blob: 12e418d85ed5d6dacd4d013683229fca725fa094 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600339 if (attrib_str == NULL) {
340 return NULL;
341 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356 Py_DECREF(attrib);
357 attrib = NULL;
358 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 } else {
360 attrib = PyDict_New();
361 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700362
363 Py_DECREF(attrib_str);
364
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600365 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
366 Py_DECREF(attrib);
367 return NULL;
368 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300369 return attrib;
370}
371
Serhiy Storchakacb985562015-05-04 15:32:48 +0300372/*[clinic input]
373module _elementtree
374class _elementtree.Element "ElementObject *" "&Element_Type"
375class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
376class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
377[clinic start generated code]*/
378/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
379
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380static int
381element_init(PyObject *self, PyObject *args, PyObject *kwds)
382{
383 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 PyObject *attrib = NULL;
385 ElementObject *self_elem;
386
387 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
388 return -1;
389
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 if (attrib) {
391 /* attrib passed as positional arg */
392 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 if (!attrib)
394 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (kwds) {
396 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200397 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300398 return -1;
399 }
400 }
401 } else if (kwds) {
402 /* have keywords args */
403 attrib = get_attrib_from_keywords(kwds);
404 if (!attrib)
405 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 }
407
408 self_elem = (ElementObject *)self;
409
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 return -1;
414 }
415 }
416
Eli Bendersky48d358b2012-05-30 17:57:50 +0300417 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200418 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419
420 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300422 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300425 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
Eli Bendersky092af1f2012-03-04 07:14:03 +0200427 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300428 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429
430 return 0;
431}
432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200434element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200436 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 PyObject* *children;
438
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300439 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 /* make sure self->children can hold the given number of extra
441 elements. set an exception and return -1 if allocation failed */
442
Victor Stinner5f0af232013-07-11 23:01:36 +0200443 if (!self->extra) {
444 if (create_extra(self, NULL) < 0)
445 return -1;
446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200448 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449
450 if (size > self->extra->allocated) {
451 /* use Python 2.4's list growth strategy */
452 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100454 * which needs at least 4 bytes.
455 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 * be safe.
457 */
458 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200459 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
460 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000462 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100463 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 * false alarm always assume at least one child to be safe.
465 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000466 children = PyObject_Realloc(self->extra->children,
467 size * sizeof(PyObject*));
468 if (!children)
469 goto nomemory;
470 } else {
471 children = PyObject_Malloc(size * sizeof(PyObject*));
472 if (!children)
473 goto nomemory;
474 /* copy existing children from static area to malloc buffer */
475 memcpy(children, self->extra->children,
476 self->extra->length * sizeof(PyObject*));
477 }
478 self->extra->children = children;
479 self->extra->allocated = size;
480 }
481
482 return 0;
483
484 nomemory:
485 PyErr_NoMemory();
486 return -1;
487}
488
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300489LOCAL(void)
490raise_type_error(PyObject *element)
491{
492 PyErr_Format(PyExc_TypeError,
493 "expected an Element, not \"%.200s\"",
494 Py_TYPE(element)->tp_name);
495}
496
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000497LOCAL(int)
498element_add_subelement(ElementObject* self, PyObject* element)
499{
500 /* add a child element to a parent */
501
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300502 if (!Element_Check(element)) {
503 raise_type_error(element);
504 return -1;
505 }
506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000507 if (element_resize(self, 1) < 0)
508 return -1;
509
510 Py_INCREF(element);
511 self->extra->children[self->extra->length] = element;
512
513 self->extra->length++;
514
515 return 0;
516}
517
518LOCAL(PyObject*)
519element_get_attrib(ElementObject* self)
520{
521 /* return borrowed reference to attrib dictionary */
522 /* note: this function assumes that the extra section exists */
523
524 PyObject* res = self->extra->attrib;
525
526 if (res == Py_None) {
527 /* create missing dictionary */
528 res = PyDict_New();
529 if (!res)
530 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200531 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532 self->extra->attrib = res;
533 }
534
535 return res;
536}
537
538LOCAL(PyObject*)
539element_get_text(ElementObject* self)
540{
541 /* return borrowed reference to text attribute */
542
Serhiy Storchaka576def02017-03-30 09:47:31 +0300543 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000544
545 if (JOIN_GET(res)) {
546 res = JOIN_OBJ(res);
547 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300548 PyObject *tmp = list_join(res);
549 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300551 self->text = tmp;
552 Py_DECREF(res);
553 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554 }
555 }
556
557 return res;
558}
559
560LOCAL(PyObject*)
561element_get_tail(ElementObject* self)
562{
563 /* return borrowed reference to text attribute */
564
Serhiy Storchaka576def02017-03-30 09:47:31 +0300565 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000566
567 if (JOIN_GET(res)) {
568 res = JOIN_OBJ(res);
569 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300570 PyObject *tmp = list_join(res);
571 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000572 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300573 self->tail = tmp;
574 Py_DECREF(res);
575 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000576 }
577 }
578
579 return res;
580}
581
582static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300583subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584{
585 PyObject* elem;
586
587 ElementObject* parent;
588 PyObject* tag;
589 PyObject* attrib = NULL;
590 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
591 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800592 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000593 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800594 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000595
Eli Bendersky737b1732012-05-29 06:02:56 +0300596 if (attrib) {
597 /* attrib passed as positional arg */
598 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 if (!attrib)
600 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600601 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
602 Py_DECREF(attrib);
603 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300604 }
605 } else if (kwds) {
606 /* have keyword args */
607 attrib = get_attrib_from_keywords(kwds);
608 if (!attrib)
609 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000610 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612 Py_INCREF(Py_None);
613 attrib = Py_None;
614 }
615
Eli Bendersky092af1f2012-03-04 07:14:03 +0200616 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200618 if (elem == NULL)
619 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000620
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000621 if (element_add_subelement(parent, elem) < 0) {
622 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000623 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000624 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625
626 return elem;
627}
628
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629static int
630element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
631{
632 Py_VISIT(self->tag);
633 Py_VISIT(JOIN_OBJ(self->text));
634 Py_VISIT(JOIN_OBJ(self->tail));
635
636 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200637 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300638 Py_VISIT(self->extra->attrib);
639
640 for (i = 0; i < self->extra->length; ++i)
641 Py_VISIT(self->extra->children[i]);
642 }
643 return 0;
644}
645
646static int
647element_gc_clear(ElementObject *self)
648{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700650 _clear_joined_ptr(&self->text);
651 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300652
653 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300654 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300655 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300656 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657 return 0;
658}
659
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660static void
661element_dealloc(ElementObject* self)
662{
INADA Naokia6296d32017-08-24 14:55:17 +0900663 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200665 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300666
667 if (self->weakreflist != NULL)
668 PyObject_ClearWeakRefs((PyObject *) self);
669
Eli Bendersky0192ba32012-03-30 16:38:33 +0300670 /* element_gc_clear clears all references and deallocates extra
671 */
672 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200675 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200676 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677}
678
679/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
Serhiy Storchakacb985562015-05-04 15:32:48 +0300681/*[clinic input]
682_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000683
Serhiy Storchakacb985562015-05-04 15:32:48 +0300684 subelement: object(subclass_of='&Element_Type')
685 /
686
687[clinic start generated code]*/
688
689static PyObject *
690_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
691/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
692{
693 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000694 return NULL;
695
696 Py_RETURN_NONE;
697}
698
Serhiy Storchakacb985562015-05-04 15:32:48 +0300699/*[clinic input]
700_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701
Serhiy Storchakacb985562015-05-04 15:32:48 +0300702[clinic start generated code]*/
703
704static PyObject *
705_elementtree_Element_clear_impl(ElementObject *self)
706/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
707{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300708 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709
710 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300711 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300714 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 Py_RETURN_NONE;
717}
718
Serhiy Storchakacb985562015-05-04 15:32:48 +0300719/*[clinic input]
720_elementtree.Element.__copy__
721
722[clinic start generated code]*/
723
724static PyObject *
725_elementtree_Element___copy___impl(ElementObject *self)
726/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200728 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000729 ElementObject* element;
730
Eli Bendersky092af1f2012-03-04 07:14:03 +0200731 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800732 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733 if (!element)
734 return NULL;
735
Oren Milman39ecb9c2017-10-10 23:26:24 +0300736 Py_INCREF(JOIN_OBJ(self->text));
737 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000738
Oren Milman39ecb9c2017-10-10 23:26:24 +0300739 Py_INCREF(JOIN_OBJ(self->tail));
740 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300742 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000744 if (element_resize(element, self->extra->length) < 0) {
745 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000747 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
749 for (i = 0; i < self->extra->length; i++) {
750 Py_INCREF(self->extra->children[i]);
751 element->extra->children[i] = self->extra->children[i];
752 }
753
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300754 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000756 }
757
758 return (PyObject*) element;
759}
760
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200761/* Helper for a deep copy. */
762LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
763
Serhiy Storchakacb985562015-05-04 15:32:48 +0300764/*[clinic input]
765_elementtree.Element.__deepcopy__
766
Oren Milmand0568182017-09-12 17:39:15 +0300767 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300768 /
769
770[clinic start generated code]*/
771
772static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300773_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
774/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000775{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200776 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000777 ElementObject* element;
778 PyObject* tag;
779 PyObject* attrib;
780 PyObject* text;
781 PyObject* tail;
782 PyObject* id;
783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 tag = deepcopy(self->tag, memo);
785 if (!tag)
786 return NULL;
787
788 if (self->extra) {
789 attrib = deepcopy(self->extra->attrib, memo);
790 if (!attrib) {
791 Py_DECREF(tag);
792 return NULL;
793 }
794 } else {
795 Py_INCREF(Py_None);
796 attrib = Py_None;
797 }
798
Eli Bendersky092af1f2012-03-04 07:14:03 +0200799 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800
801 Py_DECREF(tag);
802 Py_DECREF(attrib);
803
804 if (!element)
805 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100806
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 text = deepcopy(JOIN_OBJ(self->text), memo);
808 if (!text)
809 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300810 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000811
812 tail = deepcopy(JOIN_OBJ(self->tail), memo);
813 if (!tail)
814 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300815 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300817 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 if (element_resize(element, self->extra->length) < 0)
820 goto error;
821
822 for (i = 0; i < self->extra->length; i++) {
823 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300824 if (!child || !Element_Check(child)) {
825 if (child) {
826 raise_type_error(child);
827 Py_DECREF(child);
828 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000829 element->extra->length = i;
830 goto error;
831 }
832 element->extra->children[i] = child;
833 }
834
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300835 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000837 }
838
839 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700840 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000841 if (!id)
842 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843
844 i = PyDict_SetItem(memo, id, (PyObject*) element);
845
846 Py_DECREF(id);
847
848 if (i < 0)
849 goto error;
850
851 return (PyObject*) element;
852
853 error:
854 Py_DECREF(element);
855 return NULL;
856}
857
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200858LOCAL(PyObject *)
859deepcopy(PyObject *object, PyObject *memo)
860{
861 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200862 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200864
865 /* Fast paths */
866 if (object == Py_None || PyUnicode_CheckExact(object)) {
867 Py_INCREF(object);
868 return object;
869 }
870
871 if (Py_REFCNT(object) == 1) {
872 if (PyDict_CheckExact(object)) {
873 PyObject *key, *value;
874 Py_ssize_t pos = 0;
875 int simple = 1;
876 while (PyDict_Next(object, &pos, &key, &value)) {
877 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
878 simple = 0;
879 break;
880 }
881 }
882 if (simple)
883 return PyDict_Copy(object);
884 /* Fall through to general case */
885 }
886 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300887 return _elementtree_Element___deepcopy___impl(
888 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200889 }
890 }
891
892 /* General case */
893 st = ET_STATE_GLOBAL;
894 if (!st->deepcopy_obj) {
895 PyErr_SetString(PyExc_RuntimeError,
896 "deepcopy helper not found");
897 return NULL;
898 }
899
Victor Stinner7fbac452016-08-20 01:34:44 +0200900 stack[0] = object;
901 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200902 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200903}
904
905
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906/*[clinic input]
907_elementtree.Element.__sizeof__ -> Py_ssize_t
908
909[clinic start generated code]*/
910
911static Py_ssize_t
912_elementtree_Element___sizeof___impl(ElementObject *self)
913/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200914{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200915 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200916 if (self->extra) {
917 result += sizeof(ElementObjectExtra);
918 if (self->extra->children != self->extra->_children)
919 result += sizeof(PyObject*) * self->extra->allocated;
920 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300921 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200922}
923
Eli Bendersky698bdb22013-01-10 06:01:06 -0800924/* dict keys for getstate/setstate. */
925#define PICKLED_TAG "tag"
926#define PICKLED_CHILDREN "_children"
927#define PICKLED_ATTRIB "attrib"
928#define PICKLED_TAIL "tail"
929#define PICKLED_TEXT "text"
930
931/* __getstate__ returns a fabricated instance dict as in the pure-Python
932 * Element implementation, for interoperability/interchangeability. This
933 * makes the pure-Python implementation details an API, but (a) there aren't
934 * any unnecessary structures there; and (b) it buys compatibility with 3.2
935 * pickles. See issue #16076.
936 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300937/*[clinic input]
938_elementtree.Element.__getstate__
939
940[clinic start generated code]*/
941
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300943_elementtree_Element___getstate___impl(ElementObject *self)
944/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200946 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800947 PyObject *instancedict = NULL, *children;
948
949 /* Build a list of children. */
950 children = PyList_New(self->extra ? self->extra->length : 0);
951 if (!children)
952 return NULL;
953 for (i = 0; i < PyList_GET_SIZE(children); i++) {
954 PyObject *child = self->extra->children[i];
955 Py_INCREF(child);
956 PyList_SET_ITEM(children, i, child);
957 }
958
959 /* Construct the state object. */
960 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
961 if (noattrib)
962 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
963 PICKLED_TAG, self->tag,
964 PICKLED_CHILDREN, children,
965 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700966 PICKLED_TEXT, JOIN_OBJ(self->text),
967 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968 else
969 instancedict = Py_BuildValue("{sOsOsOsOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800975 if (instancedict) {
976 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800977 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800978 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800979 else {
980 for (i = 0; i < PyList_GET_SIZE(children); i++)
981 Py_DECREF(PyList_GET_ITEM(children, i));
982 Py_DECREF(children);
983
984 return NULL;
985 }
986}
987
988static PyObject *
989element_setstate_from_attributes(ElementObject *self,
990 PyObject *tag,
991 PyObject *attrib,
992 PyObject *text,
993 PyObject *tail,
994 PyObject *children)
995{
996 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300997 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800998
999 if (!tag) {
1000 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1001 return NULL;
1002 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001004 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001005 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001006
Oren Milman39ecb9c2017-10-10 23:26:24 +03001007 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1008 Py_INCREF(JOIN_OBJ(text));
1009 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Oren Milman39ecb9c2017-10-10 23:26:24 +03001011 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1012 Py_INCREF(JOIN_OBJ(tail));
1013 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014
1015 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001016 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001018 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001019
1020 /* Compute 'nchildren'. */
1021 if (children) {
1022 if (!PyList_Check(children)) {
1023 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1024 return NULL;
1025 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001026 nchildren = PyList_GET_SIZE(children);
1027
1028 /* (Re-)allocate 'extra'.
1029 Avoid DECREFs calling into this code again (cycles, etc.)
1030 */
1031 oldextra = self->extra;
1032 self->extra = NULL;
1033 if (element_resize(self, nchildren)) {
1034 assert(!self->extra || !self->extra->length);
1035 clear_extra(self);
1036 self->extra = oldextra;
1037 return NULL;
1038 }
1039 assert(self->extra);
1040 assert(self->extra->allocated >= nchildren);
1041 if (oldextra) {
1042 assert(self->extra->attrib == Py_None);
1043 self->extra->attrib = oldextra->attrib;
1044 oldextra->attrib = Py_None;
1045 }
1046
1047 /* Copy children */
1048 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001049 PyObject *child = PyList_GET_ITEM(children, i);
1050 if (!Element_Check(child)) {
1051 raise_type_error(child);
1052 self->extra->length = i;
1053 dealloc_extra(oldextra);
1054 return NULL;
1055 }
1056 Py_INCREF(child);
1057 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001058 }
1059
1060 assert(!self->extra->length);
1061 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001062 }
1063 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001064 if (element_resize(self, 0)) {
1065 return NULL;
1066 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067 }
1068
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 /* Stash attrib. */
1070 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001071 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001072 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001073 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001074 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001075
1076 Py_RETURN_NONE;
1077}
1078
1079/* __setstate__ for Element instance from the Python implementation.
1080 * 'state' should be the instance dict.
1081 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001082
Eli Bendersky698bdb22013-01-10 06:01:06 -08001083static PyObject *
1084element_setstate_from_Python(ElementObject *self, PyObject *state)
1085{
1086 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1087 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1088 PyObject *args;
1089 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001090 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001091
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092 tag = attrib = text = tail = children = NULL;
1093 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001094 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001095 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001096
1097 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1098 &attrib, &text, &tail, &children))
1099 retval = element_setstate_from_attributes(self, tag, attrib, text,
1100 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001101 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001102 retval = NULL;
1103
1104 Py_DECREF(args);
1105 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001106}
1107
Serhiy Storchakacb985562015-05-04 15:32:48 +03001108/*[clinic input]
1109_elementtree.Element.__setstate__
1110
1111 state: object
1112 /
1113
1114[clinic start generated code]*/
1115
Eli Bendersky698bdb22013-01-10 06:01:06 -08001116static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001117_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1118/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001119{
1120 if (!PyDict_CheckExact(state)) {
1121 PyErr_Format(PyExc_TypeError,
1122 "Don't know how to unpickle \"%.200R\" as an Element",
1123 state);
1124 return NULL;
1125 }
1126 else
1127 return element_setstate_from_Python(self, state);
1128}
1129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001130LOCAL(int)
1131checkpath(PyObject* tag)
1132{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133 Py_ssize_t i;
1134 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135
1136 /* check if a tag contains an xpath character */
1137
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001138#define PATHCHAR(ch) \
1139 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001140
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001141 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1143 void *data = PyUnicode_DATA(tag);
1144 unsigned int kind = PyUnicode_KIND(tag);
1145 for (i = 0; i < len; i++) {
1146 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1147 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001150 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001151 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 return 1;
1153 }
1154 return 0;
1155 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001156 if (PyBytes_Check(tag)) {
1157 char *p = PyBytes_AS_STRING(tag);
1158 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159 if (p[i] == '{')
1160 check = 0;
1161 else if (p[i] == '}')
1162 check = 1;
1163 else if (check && PATHCHAR(p[i]))
1164 return 1;
1165 }
1166 return 0;
1167 }
1168
1169 return 1; /* unknown type; might be path expression */
1170}
1171
Serhiy Storchakacb985562015-05-04 15:32:48 +03001172/*[clinic input]
1173_elementtree.Element.extend
1174
1175 elements: object
1176 /
1177
1178[clinic start generated code]*/
1179
1180static PyObject *
1181_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1182/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001183{
1184 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001185 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001186
Serhiy Storchakacb985562015-05-04 15:32:48 +03001187 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188 if (!seq) {
1189 PyErr_Format(
1190 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001191 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001192 );
1193 return NULL;
1194 }
1195
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001196 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001198 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 if (element_add_subelement(self, element) < 0) {
1200 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001201 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202 return NULL;
1203 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001204 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001205 }
1206
1207 Py_DECREF(seq);
1208
1209 Py_RETURN_NONE;
1210}
1211
Serhiy Storchakacb985562015-05-04 15:32:48 +03001212/*[clinic input]
1213_elementtree.Element.find
1214
1215 path: object
1216 namespaces: object = None
1217
1218[clinic start generated code]*/
1219
1220static PyObject *
1221_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1222 PyObject *namespaces)
1223/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001225 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001226 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001227
Serhiy Storchakacb985562015-05-04 15:32:48 +03001228 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001229 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001230 return _PyObject_CallMethodIdObjArgs(
1231 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001233 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001234
1235 if (!self->extra)
1236 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001237
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 for (i = 0; i < self->extra->length; i++) {
1239 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001241 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001243 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
1247 if (rc < 0)
1248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 }
1250
1251 Py_RETURN_NONE;
1252}
1253
Serhiy Storchakacb985562015-05-04 15:32:48 +03001254/*[clinic input]
1255_elementtree.Element.findtext
1256
1257 path: object
1258 default: object = None
1259 namespaces: object = None
1260
1261[clinic start generated code]*/
1262
1263static PyObject *
1264_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1265 PyObject *default_value,
1266 PyObject *namespaces)
1267/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001268{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001269 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001270 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001271 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001272
Serhiy Storchakacb985562015-05-04 15:32:48 +03001273 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001274 return _PyObject_CallMethodIdObjArgs(
1275 st->elementpath_obj, &PyId_findtext,
1276 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 );
1278
1279 if (!self->extra) {
1280 Py_INCREF(default_value);
1281 return default_value;
1282 }
1283
1284 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001285 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001286 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001287 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001288 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001289 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001290 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001291 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 if (text == Py_None) {
1293 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001294 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001296 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 return text;
1299 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001300 Py_DECREF(item);
1301 if (rc < 0)
1302 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 Py_INCREF(default_value);
1306 return default_value;
1307}
1308
Serhiy Storchakacb985562015-05-04 15:32:48 +03001309/*[clinic input]
1310_elementtree.Element.findall
1311
1312 path: object
1313 namespaces: object = None
1314
1315[clinic start generated code]*/
1316
1317static PyObject *
1318_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1319 PyObject *namespaces)
1320/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001321{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001322 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001323 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001324 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001325
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001326 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001327 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001328 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001329 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001330 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001331 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001332
1333 out = PyList_New(0);
1334 if (!out)
1335 return NULL;
1336
1337 if (!self->extra)
1338 return out;
1339
1340 for (i = 0; i < self->extra->length; i++) {
1341 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001342 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001343 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001344 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001345 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001346 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1347 Py_DECREF(item);
1348 Py_DECREF(out);
1349 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001350 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001351 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001352 }
1353
1354 return out;
1355}
1356
Serhiy Storchakacb985562015-05-04 15:32:48 +03001357/*[clinic input]
1358_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001359
Serhiy Storchakacb985562015-05-04 15:32:48 +03001360 path: object
1361 namespaces: object = None
1362
1363[clinic start generated code]*/
1364
1365static PyObject *
1366_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1367 PyObject *namespaces)
1368/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1369{
1370 PyObject* tag = path;
1371 _Py_IDENTIFIER(iterfind);
1372 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001373
Victor Stinnerf5616342016-12-09 15:26:00 +01001374 return _PyObject_CallMethodIdObjArgs(
1375 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001376}
1377
Serhiy Storchakacb985562015-05-04 15:32:48 +03001378/*[clinic input]
1379_elementtree.Element.get
1380
1381 key: object
1382 default: object = None
1383
1384[clinic start generated code]*/
1385
1386static PyObject *
1387_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1388 PyObject *default_value)
1389/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001390{
1391 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001392
1393 if (!self->extra || self->extra->attrib == Py_None)
1394 value = default_value;
1395 else {
1396 value = PyDict_GetItem(self->extra->attrib, key);
1397 if (!value)
1398 value = default_value;
1399 }
1400
1401 Py_INCREF(value);
1402 return value;
1403}
1404
Serhiy Storchakacb985562015-05-04 15:32:48 +03001405/*[clinic input]
1406_elementtree.Element.getchildren
1407
1408[clinic start generated code]*/
1409
1410static PyObject *
1411_elementtree_Element_getchildren_impl(ElementObject *self)
1412/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001413{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001414 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001415 PyObject* list;
1416
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001417 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1418 "This method will be removed in future versions. "
1419 "Use 'list(elem)' or iteration over elem instead.",
1420 1) < 0) {
1421 return NULL;
1422 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424 if (!self->extra)
1425 return PyList_New(0);
1426
1427 list = PyList_New(self->extra->length);
1428 if (!list)
1429 return NULL;
1430
1431 for (i = 0; i < self->extra->length; i++) {
1432 PyObject* item = self->extra->children[i];
1433 Py_INCREF(item);
1434 PyList_SET_ITEM(list, i, item);
1435 }
1436
1437 return list;
1438}
1439
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001440
Eli Bendersky64d11e62012-06-15 07:42:50 +03001441static PyObject *
1442create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1443
1444
Serhiy Storchakacb985562015-05-04 15:32:48 +03001445/*[clinic input]
1446_elementtree.Element.iter
1447
1448 tag: object = None
1449
1450[clinic start generated code]*/
1451
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001453_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1454/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001455{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001456 if (PyUnicode_Check(tag)) {
1457 if (PyUnicode_READY(tag) < 0)
1458 return NULL;
1459 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1460 tag = Py_None;
1461 }
1462 else if (PyBytes_Check(tag)) {
1463 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1464 tag = Py_None;
1465 }
1466
Eli Bendersky64d11e62012-06-15 07:42:50 +03001467 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468}
1469
1470
Serhiy Storchakacb985562015-05-04 15:32:48 +03001471/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001472_elementtree.Element.getiterator
1473
1474 tag: object = None
1475
1476[clinic start generated code]*/
1477
1478static PyObject *
1479_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1480/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1481{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001482 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001483 "This method will be removed in future versions. "
1484 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1485 1) < 0) {
1486 return NULL;
1487 }
1488 return _elementtree_Element_iter_impl(self, tag);
1489}
1490
1491
1492/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001493_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495[clinic start generated code]*/
1496
1497static PyObject *
1498_elementtree_Element_itertext_impl(ElementObject *self)
1499/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1500{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001501 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502}
1503
Eli Bendersky64d11e62012-06-15 07:42:50 +03001504
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001506element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001508 ElementObject* self = (ElementObject*) self_;
1509
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 if (!self->extra || index < 0 || index >= self->extra->length) {
1511 PyErr_SetString(
1512 PyExc_IndexError,
1513 "child index out of range"
1514 );
1515 return NULL;
1516 }
1517
1518 Py_INCREF(self->extra->children[index]);
1519 return self->extra->children[index];
1520}
1521
Serhiy Storchakacb985562015-05-04 15:32:48 +03001522/*[clinic input]
1523_elementtree.Element.insert
1524
1525 index: Py_ssize_t
1526 subelement: object(subclass_of='&Element_Type')
1527 /
1528
1529[clinic start generated code]*/
1530
1531static PyObject *
1532_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1533 PyObject *subelement)
1534/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001536 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537
Victor Stinner5f0af232013-07-11 23:01:36 +02001538 if (!self->extra) {
1539 if (create_extra(self, NULL) < 0)
1540 return NULL;
1541 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001543 if (index < 0) {
1544 index += self->extra->length;
1545 if (index < 0)
1546 index = 0;
1547 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548 if (index > self->extra->length)
1549 index = self->extra->length;
1550
1551 if (element_resize(self, 1) < 0)
1552 return NULL;
1553
1554 for (i = self->extra->length; i > index; i--)
1555 self->extra->children[i] = self->extra->children[i-1];
1556
Serhiy Storchakacb985562015-05-04 15:32:48 +03001557 Py_INCREF(subelement);
1558 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559
1560 self->extra->length++;
1561
1562 Py_RETURN_NONE;
1563}
1564
Serhiy Storchakacb985562015-05-04 15:32:48 +03001565/*[clinic input]
1566_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567
Serhiy Storchakacb985562015-05-04 15:32:48 +03001568[clinic start generated code]*/
1569
1570static PyObject *
1571_elementtree_Element_items_impl(ElementObject *self)
1572/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1573{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 if (!self->extra || self->extra->attrib == Py_None)
1575 return PyList_New(0);
1576
1577 return PyDict_Items(self->extra->attrib);
1578}
1579
Serhiy Storchakacb985562015-05-04 15:32:48 +03001580/*[clinic input]
1581_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582
Serhiy Storchakacb985562015-05-04 15:32:48 +03001583[clinic start generated code]*/
1584
1585static PyObject *
1586_elementtree_Element_keys_impl(ElementObject *self)
1587/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1588{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 if (!self->extra || self->extra->attrib == Py_None)
1590 return PyList_New(0);
1591
1592 return PyDict_Keys(self->extra->attrib);
1593}
1594
Martin v. Löwis18e16552006-02-15 17:27:45 +00001595static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596element_length(ElementObject* self)
1597{
1598 if (!self->extra)
1599 return 0;
1600
1601 return self->extra->length;
1602}
1603
Serhiy Storchakacb985562015-05-04 15:32:48 +03001604/*[clinic input]
1605_elementtree.Element.makeelement
1606
1607 tag: object
1608 attrib: object
1609 /
1610
1611[clinic start generated code]*/
1612
1613static PyObject *
1614_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1615 PyObject *attrib)
1616/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617{
1618 PyObject* elem;
1619
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620 attrib = PyDict_Copy(attrib);
1621 if (!attrib)
1622 return NULL;
1623
Eli Bendersky092af1f2012-03-04 07:14:03 +02001624 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625
1626 Py_DECREF(attrib);
1627
1628 return elem;
1629}
1630
Serhiy Storchakacb985562015-05-04 15:32:48 +03001631/*[clinic input]
1632_elementtree.Element.remove
1633
1634 subelement: object(subclass_of='&Element_Type')
1635 /
1636
1637[clinic start generated code]*/
1638
1639static PyObject *
1640_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1641/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001643 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001644 int rc;
1645 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001646
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001647 if (!self->extra) {
1648 /* element has no children, so raise exception */
1649 PyErr_SetString(
1650 PyExc_ValueError,
1651 "list.remove(x): x not in list"
1652 );
1653 return NULL;
1654 }
1655
1656 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001657 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001659 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001660 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001662 if (rc < 0)
1663 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664 }
1665
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001666 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001667 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 PyErr_SetString(
1669 PyExc_ValueError,
1670 "list.remove(x): x not in list"
1671 );
1672 return NULL;
1673 }
1674
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001675 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001676
1677 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001678 for (; i < self->extra->length; i++)
1679 self->extra->children[i] = self->extra->children[i+1];
1680
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001681 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682 Py_RETURN_NONE;
1683}
1684
1685static PyObject*
1686element_repr(ElementObject* self)
1687{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001688 int status;
1689
1690 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001691 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001692
1693 status = Py_ReprEnter((PyObject *)self);
1694 if (status == 0) {
1695 PyObject *res;
1696 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1697 Py_ReprLeave((PyObject *)self);
1698 return res;
1699 }
1700 if (status > 0)
1701 PyErr_Format(PyExc_RuntimeError,
1702 "reentrant call inside %s.__repr__",
1703 Py_TYPE(self)->tp_name);
1704 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001705}
1706
Serhiy Storchakacb985562015-05-04 15:32:48 +03001707/*[clinic input]
1708_elementtree.Element.set
1709
1710 key: object
1711 value: object
1712 /
1713
1714[clinic start generated code]*/
1715
1716static PyObject *
1717_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1718 PyObject *value)
1719/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001720{
1721 PyObject* attrib;
1722
Victor Stinner5f0af232013-07-11 23:01:36 +02001723 if (!self->extra) {
1724 if (create_extra(self, NULL) < 0)
1725 return NULL;
1726 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727
1728 attrib = element_get_attrib(self);
1729 if (!attrib)
1730 return NULL;
1731
1732 if (PyDict_SetItem(attrib, key, value) < 0)
1733 return NULL;
1734
1735 Py_RETURN_NONE;
1736}
1737
1738static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001739element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001740{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001741 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001743 PyObject* old;
1744
1745 if (!self->extra || index < 0 || index >= self->extra->length) {
1746 PyErr_SetString(
1747 PyExc_IndexError,
1748 "child assignment index out of range");
1749 return -1;
1750 }
1751
1752 old = self->extra->children[index];
1753
1754 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001755 if (!Element_Check(item)) {
1756 raise_type_error(item);
1757 return -1;
1758 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001759 Py_INCREF(item);
1760 self->extra->children[index] = item;
1761 } else {
1762 self->extra->length--;
1763 for (i = index; i < self->extra->length; i++)
1764 self->extra->children[i] = self->extra->children[i+1];
1765 }
1766
1767 Py_DECREF(old);
1768
1769 return 0;
1770}
1771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772static PyObject*
1773element_subscr(PyObject* self_, PyObject* item)
1774{
1775 ElementObject* self = (ElementObject*) self_;
1776
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777 if (PyIndex_Check(item)) {
1778 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779
1780 if (i == -1 && PyErr_Occurred()) {
1781 return NULL;
1782 }
1783 if (i < 0 && self->extra)
1784 i += self->extra->length;
1785 return element_getitem(self_, i);
1786 }
1787 else if (PySlice_Check(item)) {
1788 Py_ssize_t start, stop, step, slicelen, cur, i;
1789 PyObject* list;
1790
1791 if (!self->extra)
1792 return PyList_New(0);
1793
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001794 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795 return NULL;
1796 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001797 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1798 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799
1800 if (slicelen <= 0)
1801 return PyList_New(0);
1802 else {
1803 list = PyList_New(slicelen);
1804 if (!list)
1805 return NULL;
1806
1807 for (cur = start, i = 0; i < slicelen;
1808 cur += step, i++) {
1809 PyObject* item = self->extra->children[cur];
1810 Py_INCREF(item);
1811 PyList_SET_ITEM(list, i, item);
1812 }
1813
1814 return list;
1815 }
1816 }
1817 else {
1818 PyErr_SetString(PyExc_TypeError,
1819 "element indices must be integers");
1820 return NULL;
1821 }
1822}
1823
1824static int
1825element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1826{
1827 ElementObject* self = (ElementObject*) self_;
1828
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001829 if (PyIndex_Check(item)) {
1830 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831
1832 if (i == -1 && PyErr_Occurred()) {
1833 return -1;
1834 }
1835 if (i < 0 && self->extra)
1836 i += self->extra->length;
1837 return element_setitem(self_, i, value);
1838 }
1839 else if (PySlice_Check(item)) {
1840 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1841
1842 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001843 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001844
Victor Stinner5f0af232013-07-11 23:01:36 +02001845 if (!self->extra) {
1846 if (create_extra(self, NULL) < 0)
1847 return -1;
1848 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001850 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001851 return -1;
1852 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001853 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1854 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
Eli Bendersky865756a2012-03-09 13:38:15 +02001856 if (value == NULL) {
1857 /* Delete slice */
1858 size_t cur;
1859 Py_ssize_t i;
1860
1861 if (slicelen <= 0)
1862 return 0;
1863
1864 /* Since we're deleting, the direction of the range doesn't matter,
1865 * so for simplicity make it always ascending.
1866 */
1867 if (step < 0) {
1868 stop = start + 1;
1869 start = stop + step * (slicelen - 1) - 1;
1870 step = -step;
1871 }
1872
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001873 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001874
1875 /* recycle is a list that will contain all the children
1876 * scheduled for removal.
1877 */
1878 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001879 return -1;
1880 }
1881
1882 /* This loop walks over all the children that have to be deleted,
1883 * with cur pointing at them. num_moved is the amount of children
1884 * until the next deleted child that have to be "shifted down" to
1885 * occupy the deleted's places.
1886 * Note that in the ith iteration, shifting is done i+i places down
1887 * because i children were already removed.
1888 */
1889 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1890 /* Compute how many children have to be moved, clipping at the
1891 * list end.
1892 */
1893 Py_ssize_t num_moved = step - 1;
1894 if (cur + step >= (size_t)self->extra->length) {
1895 num_moved = self->extra->length - cur - 1;
1896 }
1897
1898 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1899
1900 memmove(
1901 self->extra->children + cur - i,
1902 self->extra->children + cur + 1,
1903 num_moved * sizeof(PyObject *));
1904 }
1905
1906 /* Leftover "tail" after the last removed child */
1907 cur = start + (size_t)slicelen * step;
1908 if (cur < (size_t)self->extra->length) {
1909 memmove(
1910 self->extra->children + cur - slicelen,
1911 self->extra->children + cur,
1912 (self->extra->length - cur) * sizeof(PyObject *));
1913 }
1914
1915 self->extra->length -= slicelen;
1916
1917 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001918 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001919 return 0;
1920 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001921
1922 /* A new slice is actually being assigned */
1923 seq = PySequence_Fast(value, "");
1924 if (!seq) {
1925 PyErr_Format(
1926 PyExc_TypeError,
1927 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1928 );
1929 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001930 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001931 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001932
1933 if (step != 1 && newlen != slicelen)
1934 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001935 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001936 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001937 "attempt to assign sequence of size %zd "
1938 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001939 newlen, slicelen
1940 );
1941 return -1;
1942 }
1943
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944 /* Resize before creating the recycle bin, to prevent refleaks. */
1945 if (newlen > slicelen) {
1946 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001947 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001948 return -1;
1949 }
1950 }
1951
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001952 for (i = 0; i < newlen; i++) {
1953 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1954 if (!Element_Check(element)) {
1955 raise_type_error(element);
1956 Py_DECREF(seq);
1957 return -1;
1958 }
1959 }
1960
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001961 if (slicelen > 0) {
1962 /* to avoid recursive calls to this method (via decref), move
1963 old items to the recycle bin here, and get rid of them when
1964 we're done modifying the element */
1965 recycle = PyList_New(slicelen);
1966 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001967 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001968 return -1;
1969 }
1970 for (cur = start, i = 0; i < slicelen;
1971 cur += step, i++)
1972 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1973 }
1974
1975 if (newlen < slicelen) {
1976 /* delete slice */
1977 for (i = stop; i < self->extra->length; i++)
1978 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1979 } else if (newlen > slicelen) {
1980 /* insert slice */
1981 for (i = self->extra->length-1; i >= stop; i--)
1982 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1983 }
1984
1985 /* replace the slice */
1986 for (cur = start, i = 0; i < newlen;
1987 cur += step, i++) {
1988 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1989 Py_INCREF(element);
1990 self->extra->children[cur] = element;
1991 }
1992
1993 self->extra->length += newlen - slicelen;
1994
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001995 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001996
1997 /* discard the recycle bin, and everything in it */
1998 Py_XDECREF(recycle);
1999
2000 return 0;
2001 }
2002 else {
2003 PyErr_SetString(PyExc_TypeError,
2004 "element indices must be integers");
2005 return -1;
2006 }
2007}
2008
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002009static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002010element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002011{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002012 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002013 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002014 return res;
2015}
2016
Serhiy Storchakadde08152015-11-25 15:28:13 +02002017static PyObject*
2018element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002019{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002020 PyObject *res = element_get_text(self);
2021 Py_XINCREF(res);
2022 return res;
2023}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002024
Serhiy Storchakadde08152015-11-25 15:28:13 +02002025static PyObject*
2026element_tail_getter(ElementObject *self, void *closure)
2027{
2028 PyObject *res = element_get_tail(self);
2029 Py_XINCREF(res);
2030 return res;
2031}
2032
2033static PyObject*
2034element_attrib_getter(ElementObject *self, void *closure)
2035{
2036 PyObject *res;
2037 if (!self->extra) {
2038 if (create_extra(self, NULL) < 0)
2039 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002040 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002041 res = element_get_attrib(self);
2042 Py_XINCREF(res);
2043 return res;
2044}
Victor Stinner4d463432013-07-11 23:05:03 +02002045
Serhiy Storchakadde08152015-11-25 15:28:13 +02002046/* macro for setter validation */
2047#define _VALIDATE_ATTR_VALUE(V) \
2048 if ((V) == NULL) { \
2049 PyErr_SetString( \
2050 PyExc_AttributeError, \
2051 "can't delete element attribute"); \
2052 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002053 }
2054
Serhiy Storchakadde08152015-11-25 15:28:13 +02002055static int
2056element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2057{
2058 _VALIDATE_ATTR_VALUE(value);
2059 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002060 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002061 return 0;
2062}
2063
2064static int
2065element_text_setter(ElementObject *self, PyObject *value, void *closure)
2066{
2067 _VALIDATE_ATTR_VALUE(value);
2068 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002069 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002070 return 0;
2071}
2072
2073static int
2074element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2075{
2076 _VALIDATE_ATTR_VALUE(value);
2077 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002078 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002079 return 0;
2080}
2081
2082static int
2083element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2084{
2085 _VALIDATE_ATTR_VALUE(value);
2086 if (!self->extra) {
2087 if (create_extra(self, NULL) < 0)
2088 return -1;
2089 }
2090 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002091 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002092 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002093}
2094
2095static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002096 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002097 0, /* sq_concat */
2098 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002099 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002100 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002101 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002102 0,
2103};
2104
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105/******************************* Element iterator ****************************/
2106
2107/* ElementIterObject represents the iteration state over an XML element in
2108 * pre-order traversal. To keep track of which sub-element should be returned
2109 * next, a stack of parents is maintained. This is a standard stack-based
2110 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002111 * The stack is managed using a continuous array.
2112 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113 * the current one is exhausted, and the next child to examine in that parent.
2114 */
2115typedef struct ParentLocator_t {
2116 ElementObject *parent;
2117 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118} ParentLocator;
2119
2120typedef struct {
2121 PyObject_HEAD
2122 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002123 Py_ssize_t parent_stack_used;
2124 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 ElementObject *root_element;
2126 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002127 int gettext;
2128} ElementIterObject;
2129
2130
2131static void
2132elementiter_dealloc(ElementIterObject *it)
2133{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 Py_ssize_t i = it->parent_stack_used;
2135 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002136 /* bpo-31095: UnTrack is needed before calling any callbacks */
2137 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002138 while (i--)
2139 Py_XDECREF(it->parent_stack[i].parent);
2140 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141
2142 Py_XDECREF(it->sought_tag);
2143 Py_XDECREF(it->root_element);
2144
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145 PyObject_GC_Del(it);
2146}
2147
2148static int
2149elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2150{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151 Py_ssize_t i = it->parent_stack_used;
2152 while (i--)
2153 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154
2155 Py_VISIT(it->root_element);
2156 Py_VISIT(it->sought_tag);
2157 return 0;
2158}
2159
2160/* Helper function for elementiter_next. Add a new parent to the parent stack.
2161 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002162static int
2163parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002164{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002165 ParentLocator *item;
2166
2167 if (it->parent_stack_used >= it->parent_stack_size) {
2168 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2169 ParentLocator *parent_stack = it->parent_stack;
2170 PyMem_Resize(parent_stack, ParentLocator, new_size);
2171 if (parent_stack == NULL)
2172 return -1;
2173 it->parent_stack = parent_stack;
2174 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002175 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 item = it->parent_stack + it->parent_stack_used++;
2177 Py_INCREF(parent);
2178 item->parent = parent;
2179 item->child_index = 0;
2180 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002181}
2182
2183static PyObject *
2184elementiter_next(ElementIterObject *it)
2185{
2186 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002187 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 * A short note on gettext: this function serves both the iter() and
2189 * itertext() methods to avoid code duplication. However, there are a few
2190 * small differences in the way these iterations work. Namely:
2191 * - itertext() only yields text from nodes that have it, and continues
2192 * iterating when a node doesn't have text (so it doesn't return any
2193 * node like iter())
2194 * - itertext() also has to handle tail, after finishing with all the
2195 * children of a node.
2196 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002197 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002198 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002199 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200
2201 while (1) {
2202 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002203 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002204 * iterator is exhausted.
2205 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002206 if (!it->parent_stack_used) {
2207 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002208 PyErr_SetNone(PyExc_StopIteration);
2209 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002210 }
2211
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002212 elem = it->root_element; /* steals a reference */
2213 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002214 }
2215 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002216 /* See if there are children left to traverse in the current parent. If
2217 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002218 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002219 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2220 Py_ssize_t child_index = item->child_index;
2221 ElementObjectExtra *extra;
2222 elem = item->parent;
2223 extra = elem->extra;
2224 if (!extra || child_index >= extra->length) {
2225 it->parent_stack_used--;
2226 /* Note that extra condition on it->parent_stack_used here;
2227 * this is because itertext() is supposed to only return *inner*
2228 * text, not text following the element it began iteration with.
2229 */
2230 if (it->gettext && it->parent_stack_used) {
2231 text = element_get_tail(elem);
2232 goto gettext;
2233 }
2234 Py_DECREF(elem);
2235 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002236 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002237
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002238 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002239 elem = (ElementObject *)extra->children[child_index];
2240 item->child_index++;
2241 Py_INCREF(elem);
2242 }
2243
2244 if (parent_stack_push_new(it, elem) < 0) {
2245 Py_DECREF(elem);
2246 PyErr_NoMemory();
2247 return NULL;
2248 }
2249 if (it->gettext) {
2250 text = element_get_text(elem);
2251 goto gettext;
2252 }
2253
2254 if (it->sought_tag == Py_None)
2255 return (PyObject *)elem;
2256
2257 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2258 if (rc > 0)
2259 return (PyObject *)elem;
2260
2261 Py_DECREF(elem);
2262 if (rc < 0)
2263 return NULL;
2264 continue;
2265
2266gettext:
2267 if (!text) {
2268 Py_DECREF(elem);
2269 return NULL;
2270 }
2271 if (text == Py_None) {
2272 Py_DECREF(elem);
2273 }
2274 else {
2275 Py_INCREF(text);
2276 Py_DECREF(elem);
2277 rc = PyObject_IsTrue(text);
2278 if (rc > 0)
2279 return text;
2280 Py_DECREF(text);
2281 if (rc < 0)
2282 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002283 }
2284 }
2285
2286 return NULL;
2287}
2288
2289
2290static PyTypeObject ElementIter_Type = {
2291 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002292 /* Using the module's name since the pure-Python implementation does not
2293 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 "_elementtree._element_iterator", /* tp_name */
2295 sizeof(ElementIterObject), /* tp_basicsize */
2296 0, /* tp_itemsize */
2297 /* methods */
2298 (destructor)elementiter_dealloc, /* tp_dealloc */
2299 0, /* tp_print */
2300 0, /* tp_getattr */
2301 0, /* tp_setattr */
2302 0, /* tp_reserved */
2303 0, /* tp_repr */
2304 0, /* tp_as_number */
2305 0, /* tp_as_sequence */
2306 0, /* tp_as_mapping */
2307 0, /* tp_hash */
2308 0, /* tp_call */
2309 0, /* tp_str */
2310 0, /* tp_getattro */
2311 0, /* tp_setattro */
2312 0, /* tp_as_buffer */
2313 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2314 0, /* tp_doc */
2315 (traverseproc)elementiter_traverse, /* tp_traverse */
2316 0, /* tp_clear */
2317 0, /* tp_richcompare */
2318 0, /* tp_weaklistoffset */
2319 PyObject_SelfIter, /* tp_iter */
2320 (iternextfunc)elementiter_next, /* tp_iternext */
2321 0, /* tp_methods */
2322 0, /* tp_members */
2323 0, /* tp_getset */
2324 0, /* tp_base */
2325 0, /* tp_dict */
2326 0, /* tp_descr_get */
2327 0, /* tp_descr_set */
2328 0, /* tp_dictoffset */
2329 0, /* tp_init */
2330 0, /* tp_alloc */
2331 0, /* tp_new */
2332};
2333
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002334#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002335
2336static PyObject *
2337create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2338{
2339 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002340
2341 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2342 if (!it)
2343 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002344
Victor Stinner4d463432013-07-11 23:05:03 +02002345 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002346 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002347 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002348 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002349 it->root_element = self;
2350
Eli Bendersky64d11e62012-06-15 07:42:50 +03002351 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002352
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002353 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002354 if (it->parent_stack == NULL) {
2355 Py_DECREF(it);
2356 PyErr_NoMemory();
2357 return NULL;
2358 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002359 it->parent_stack_used = 0;
2360 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002361
Eli Bendersky64d11e62012-06-15 07:42:50 +03002362 return (PyObject *)it;
2363}
2364
2365
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366/* ==================================================================== */
2367/* the tree builder type */
2368
2369typedef struct {
2370 PyObject_HEAD
2371
Eli Bendersky58d548d2012-05-29 15:45:16 +03002372 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373
Antoine Pitrouee329312012-10-04 19:53:29 +02002374 PyObject *this; /* current node */
2375 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002376
Eli Bendersky58d548d2012-05-29 15:45:16 +03002377 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378
Eli Bendersky58d548d2012-05-29 15:45:16 +03002379 PyObject *stack; /* element stack */
2380 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 PyObject *element_factory;
2383
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002385 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002386 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2387 PyObject *end_event_obj;
2388 PyObject *start_ns_event_obj;
2389 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390} TreeBuilderObject;
2391
Christian Heimes90aa7642007-12-19 02:45:37 +00002392#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393
2394/* -------------------------------------------------------------------- */
2395/* constructor and destructor */
2396
Eli Bendersky58d548d2012-05-29 15:45:16 +03002397static PyObject *
2398treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002399{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2401 if (t != NULL) {
2402 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403
Eli Bendersky58d548d2012-05-29 15:45:16 +03002404 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002405 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002406 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002407 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408
Eli Bendersky58d548d2012-05-29 15:45:16 +03002409 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002410 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002411 t->stack = PyList_New(20);
2412 if (!t->stack) {
2413 Py_DECREF(t->this);
2414 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002415 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002416 return NULL;
2417 }
2418 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002419
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002420 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002421 t->start_event_obj = t->end_event_obj = NULL;
2422 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2423 }
2424 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425}
2426
Serhiy Storchakacb985562015-05-04 15:32:48 +03002427/*[clinic input]
2428_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002429
Serhiy Storchakacb985562015-05-04 15:32:48 +03002430 element_factory: object = NULL
2431
2432[clinic start generated code]*/
2433
2434static int
2435_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2436 PyObject *element_factory)
2437/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2438{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002439 if (element_factory) {
2440 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002441 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002442 }
2443
Eli Bendersky58d548d2012-05-29 15:45:16 +03002444 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002445}
2446
Eli Bendersky48d358b2012-05-30 17:57:50 +03002447static int
2448treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2449{
2450 Py_VISIT(self->root);
2451 Py_VISIT(self->this);
2452 Py_VISIT(self->last);
2453 Py_VISIT(self->data);
2454 Py_VISIT(self->stack);
2455 Py_VISIT(self->element_factory);
2456 return 0;
2457}
2458
2459static int
2460treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002462 Py_CLEAR(self->end_ns_event_obj);
2463 Py_CLEAR(self->start_ns_event_obj);
2464 Py_CLEAR(self->end_event_obj);
2465 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002466 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002467 Py_CLEAR(self->stack);
2468 Py_CLEAR(self->data);
2469 Py_CLEAR(self->last);
2470 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002471 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002472 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002473 return 0;
2474}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475
Eli Bendersky48d358b2012-05-30 17:57:50 +03002476static void
2477treebuilder_dealloc(TreeBuilderObject *self)
2478{
2479 PyObject_GC_UnTrack(self);
2480 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002481 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482}
2483
2484/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002485/* helpers for handling of arbitrary element-like objects */
2486
2487static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002488treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002489 PyObject **dest, _Py_Identifier *name)
2490{
2491 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002492 PyObject *tmp = JOIN_OBJ(*dest);
2493 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2494 *data = NULL;
2495 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002496 return 0;
2497 }
2498 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002499 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002500 int r;
2501 if (joined == NULL)
2502 return -1;
2503 r = _PyObject_SetAttrId(element, name, joined);
2504 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002505 if (r < 0)
2506 return -1;
2507 Py_CLEAR(*data);
2508 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002509 }
2510}
2511
Serhiy Storchaka576def02017-03-30 09:47:31 +03002512LOCAL(int)
2513treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002514{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002515 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002516
Serhiy Storchaka576def02017-03-30 09:47:31 +03002517 if (!self->data) {
2518 return 0;
2519 }
2520
2521 if (self->this == element) {
2522 _Py_IDENTIFIER(text);
2523 return treebuilder_set_element_text_or_tail(
2524 element, &self->data,
2525 &((ElementObject *) element)->text, &PyId_text);
2526 }
2527 else {
2528 _Py_IDENTIFIER(tail);
2529 return treebuilder_set_element_text_or_tail(
2530 element, &self->data,
2531 &((ElementObject *) element)->tail, &PyId_tail);
2532 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002533}
2534
2535static int
2536treebuilder_add_subelement(PyObject *element, PyObject *child)
2537{
2538 _Py_IDENTIFIER(append);
2539 if (Element_CheckExact(element)) {
2540 ElementObject *elem = (ElementObject *) element;
2541 return element_add_subelement(elem, child);
2542 }
2543 else {
2544 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002545 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002546 if (res == NULL)
2547 return -1;
2548 Py_DECREF(res);
2549 return 0;
2550 }
2551}
2552
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002553LOCAL(int)
2554treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2555 PyObject *node)
2556{
2557 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002558 PyObject *res;
2559 PyObject *event = PyTuple_Pack(2, action, node);
2560 if (event == NULL)
2561 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002562 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002563 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002564 if (res == NULL)
2565 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002566 Py_DECREF(res);
2567 }
2568 return 0;
2569}
2570
Antoine Pitrouee329312012-10-04 19:53:29 +02002571/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572/* handlers */
2573
2574LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2576 PyObject* attrib)
2577{
2578 PyObject* node;
2579 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002580 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581
Serhiy Storchaka576def02017-03-30 09:47:31 +03002582 if (treebuilder_flush_data(self) < 0) {
2583 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584 }
2585
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002586 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002587 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002588 } else if (attrib == Py_None) {
2589 attrib = PyDict_New();
2590 if (!attrib)
2591 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002592 node = PyObject_CallFunctionObjArgs(self->element_factory,
2593 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002594 Py_DECREF(attrib);
2595 }
2596 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002597 node = PyObject_CallFunctionObjArgs(self->element_factory,
2598 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002599 }
2600 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002602 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603
Antoine Pitrouee329312012-10-04 19:53:29 +02002604 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605
2606 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002607 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002608 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002609 } else {
2610 if (self->root) {
2611 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002612 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613 "multiple elements on top level"
2614 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002615 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616 }
2617 Py_INCREF(node);
2618 self->root = node;
2619 }
2620
2621 if (self->index < PyList_GET_SIZE(self->stack)) {
2622 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002623 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624 Py_INCREF(this);
2625 } else {
2626 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002627 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628 }
2629 self->index++;
2630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002632 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002634 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002636 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2637 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638
2639 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002640
2641 error:
2642 Py_DECREF(node);
2643 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644}
2645
2646LOCAL(PyObject*)
2647treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2648{
2649 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002650 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002651 /* ignore calls to data before the first call to start */
2652 Py_RETURN_NONE;
2653 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654 /* store the first item as is */
2655 Py_INCREF(data); self->data = data;
2656 } else {
2657 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002658 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2659 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002660 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 /* expat often generates single character data sections; handle
2662 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002663 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2664 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002665 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002666 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667 } else if (PyList_CheckExact(self->data)) {
2668 if (PyList_Append(self->data, data) < 0)
2669 return NULL;
2670 } else {
2671 PyObject* list = PyList_New(2);
2672 if (!list)
2673 return NULL;
2674 PyList_SET_ITEM(list, 0, self->data);
2675 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2676 self->data = list;
2677 }
2678 }
2679
2680 Py_RETURN_NONE;
2681}
2682
2683LOCAL(PyObject*)
2684treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2685{
2686 PyObject* item;
2687
Serhiy Storchaka576def02017-03-30 09:47:31 +03002688 if (treebuilder_flush_data(self) < 0) {
2689 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690 }
2691
2692 if (self->index == 0) {
2693 PyErr_SetString(
2694 PyExc_IndexError,
2695 "pop from empty stack"
2696 );
2697 return NULL;
2698 }
2699
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002700 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002701 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002702 self->index--;
2703 self->this = PyList_GET_ITEM(self->stack, self->index);
2704 Py_INCREF(self->this);
2705 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002707 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2708 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
2710 Py_INCREF(self->last);
2711 return (PyObject*) self->last;
2712}
2713
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714/* -------------------------------------------------------------------- */
2715/* methods (in alphabetical order) */
2716
Serhiy Storchakacb985562015-05-04 15:32:48 +03002717/*[clinic input]
2718_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719
Serhiy Storchakacb985562015-05-04 15:32:48 +03002720 data: object
2721 /
2722
2723[clinic start generated code]*/
2724
2725static PyObject *
2726_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2727/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2728{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 return treebuilder_handle_data(self, data);
2730}
2731
Serhiy Storchakacb985562015-05-04 15:32:48 +03002732/*[clinic input]
2733_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734
Serhiy Storchakacb985562015-05-04 15:32:48 +03002735 tag: object
2736 /
2737
2738[clinic start generated code]*/
2739
2740static PyObject *
2741_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2742/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2743{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744 return treebuilder_handle_end(self, tag);
2745}
2746
2747LOCAL(PyObject*)
2748treebuilder_done(TreeBuilderObject* self)
2749{
2750 PyObject* res;
2751
2752 /* FIXME: check stack size? */
2753
2754 if (self->root)
2755 res = self->root;
2756 else
2757 res = Py_None;
2758
2759 Py_INCREF(res);
2760 return res;
2761}
2762
Serhiy Storchakacb985562015-05-04 15:32:48 +03002763/*[clinic input]
2764_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
Serhiy Storchakacb985562015-05-04 15:32:48 +03002766[clinic start generated code]*/
2767
2768static PyObject *
2769_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2770/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2771{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772 return treebuilder_done(self);
2773}
2774
Serhiy Storchakacb985562015-05-04 15:32:48 +03002775/*[clinic input]
2776_elementtree.TreeBuilder.start
2777
2778 tag: object
2779 attrs: object = None
2780 /
2781
2782[clinic start generated code]*/
2783
2784static PyObject *
2785_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2786 PyObject *attrs)
2787/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002789 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002790}
2791
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792/* ==================================================================== */
2793/* the expat interface */
2794
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002797
2798/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2799 * cached globally without being in per-module state.
2800 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002801static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803
Eli Bendersky52467b12012-06-01 07:13:08 +03002804static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2805 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2806
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807typedef struct {
2808 PyObject_HEAD
2809
2810 XML_Parser parser;
2811
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002812 PyObject *target;
2813 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002815 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002817 PyObject *handle_start;
2818 PyObject *handle_data;
2819 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002821 PyObject *handle_comment;
2822 PyObject *handle_pi;
2823 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002825 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827} XMLParserObject;
2828
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829/* helpers */
2830
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831LOCAL(PyObject*)
2832makeuniversal(XMLParserObject* self, const char* string)
2833{
2834 /* convert a UTF-8 tag/attribute name from the expat parser
2835 to a universal name string */
2836
Antoine Pitrouc1948842012-10-01 23:40:37 +02002837 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002838 PyObject* key;
2839 PyObject* value;
2840
2841 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002842 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002843 if (!key)
2844 return NULL;
2845
2846 value = PyDict_GetItem(self->names, key);
2847
2848 if (value) {
2849 Py_INCREF(value);
2850 } else {
2851 /* new name. convert to universal name, and decode as
2852 necessary */
2853
2854 PyObject* tag;
2855 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002856 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002857
2858 /* look for namespace separator */
2859 for (i = 0; i < size; i++)
2860 if (string[i] == '}')
2861 break;
2862 if (i != size) {
2863 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002864 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002865 if (tag == NULL) {
2866 Py_DECREF(key);
2867 return NULL;
2868 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002869 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002870 p[0] = '{';
2871 memcpy(p+1, string, size);
2872 size++;
2873 } else {
2874 /* plain name; use key as tag */
2875 Py_INCREF(key);
2876 tag = key;
2877 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002878
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002879 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002880 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002881 value = PyUnicode_DecodeUTF8(p, size, "strict");
2882 Py_DECREF(tag);
2883 if (!value) {
2884 Py_DECREF(key);
2885 return NULL;
2886 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887
2888 /* add to names dictionary */
2889 if (PyDict_SetItem(self->names, key, value) < 0) {
2890 Py_DECREF(key);
2891 Py_DECREF(value);
2892 return NULL;
2893 }
2894 }
2895
2896 Py_DECREF(key);
2897 return value;
2898}
2899
Eli Bendersky5b77d812012-03-16 08:20:05 +02002900/* Set the ParseError exception with the given parameters.
2901 * If message is not NULL, it's used as the error string. Otherwise, the
2902 * message string is the default for the given error_code.
2903*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002905expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2906 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002907{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002908 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002909 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002910
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002911 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002912 message ? message : EXPAT(ErrorString)(error_code),
2913 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002914 if (errmsg == NULL)
2915 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002916
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002917 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002918 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002919 if (!error)
2920 return;
2921
Eli Bendersky5b77d812012-03-16 08:20:05 +02002922 /* Add code and position attributes */
2923 code = PyLong_FromLong((long)error_code);
2924 if (!code) {
2925 Py_DECREF(error);
2926 return;
2927 }
2928 if (PyObject_SetAttrString(error, "code", code) == -1) {
2929 Py_DECREF(error);
2930 Py_DECREF(code);
2931 return;
2932 }
2933 Py_DECREF(code);
2934
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002935 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002936 if (!position) {
2937 Py_DECREF(error);
2938 return;
2939 }
2940 if (PyObject_SetAttrString(error, "position", position) == -1) {
2941 Py_DECREF(error);
2942 Py_DECREF(position);
2943 return;
2944 }
2945 Py_DECREF(position);
2946
Eli Bendersky532d03e2013-08-10 08:00:39 -07002947 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002948 Py_DECREF(error);
2949}
2950
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002951/* -------------------------------------------------------------------- */
2952/* handlers */
2953
2954static void
2955expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2956 int data_len)
2957{
2958 PyObject* key;
2959 PyObject* value;
2960 PyObject* res;
2961
2962 if (data_len < 2 || data_in[0] != '&')
2963 return;
2964
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002965 if (PyErr_Occurred())
2966 return;
2967
Neal Norwitz0269b912007-08-08 06:56:02 +00002968 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 if (!key)
2970 return;
2971
2972 value = PyDict_GetItem(self->entity, key);
2973
2974 if (value) {
2975 if (TreeBuilder_CheckExact(self->target))
2976 res = treebuilder_handle_data(
2977 (TreeBuilderObject*) self->target, value
2978 );
2979 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002980 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 else
2982 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002984 } else if (!PyErr_Occurred()) {
2985 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002986 char message[128] = "undefined entity ";
2987 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002988 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002989 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002991 EXPAT(GetErrorColumnNumber)(self->parser),
2992 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 );
2994 }
2995
2996 Py_DECREF(key);
2997}
2998
2999static void
3000expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3001 const XML_Char **attrib_in)
3002{
3003 PyObject* res;
3004 PyObject* tag;
3005 PyObject* attrib;
3006 int ok;
3007
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003008 if (PyErr_Occurred())
3009 return;
3010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011 /* tag name */
3012 tag = makeuniversal(self, tag_in);
3013 if (!tag)
3014 return; /* parser will look for errors */
3015
3016 /* attributes */
3017 if (attrib_in[0]) {
3018 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003019 if (!attrib) {
3020 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003022 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003023 while (attrib_in[0] && attrib_in[1]) {
3024 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003025 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 if (!key || !value) {
3027 Py_XDECREF(value);
3028 Py_XDECREF(key);
3029 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003030 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 return;
3032 }
3033 ok = PyDict_SetItem(attrib, key, value);
3034 Py_DECREF(value);
3035 Py_DECREF(key);
3036 if (ok < 0) {
3037 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003038 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 return;
3040 }
3041 attrib_in += 2;
3042 }
3043 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003044 Py_INCREF(Py_None);
3045 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003046 }
3047
3048 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003049 /* shortcut */
3050 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3051 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003052 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003053 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003054 if (attrib == Py_None) {
3055 Py_DECREF(attrib);
3056 attrib = PyDict_New();
3057 if (!attrib) {
3058 Py_DECREF(tag);
3059 return;
3060 }
3061 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003062 res = PyObject_CallFunctionObjArgs(self->handle_start,
3063 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003064 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065 res = NULL;
3066
3067 Py_DECREF(tag);
3068 Py_DECREF(attrib);
3069
3070 Py_XDECREF(res);
3071}
3072
3073static void
3074expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3075 int data_len)
3076{
3077 PyObject* data;
3078 PyObject* res;
3079
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003080 if (PyErr_Occurred())
3081 return;
3082
Neal Norwitz0269b912007-08-08 06:56:02 +00003083 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003084 if (!data)
3085 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086
3087 if (TreeBuilder_CheckExact(self->target))
3088 /* shortcut */
3089 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3090 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003091 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092 else
3093 res = NULL;
3094
3095 Py_DECREF(data);
3096
3097 Py_XDECREF(res);
3098}
3099
3100static void
3101expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3102{
3103 PyObject* tag;
3104 PyObject* res = NULL;
3105
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003106 if (PyErr_Occurred())
3107 return;
3108
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109 if (TreeBuilder_CheckExact(self->target))
3110 /* shortcut */
3111 /* the standard tree builder doesn't look at the end tag */
3112 res = treebuilder_handle_end(
3113 (TreeBuilderObject*) self->target, Py_None
3114 );
3115 else if (self->handle_end) {
3116 tag = makeuniversal(self, tag_in);
3117 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003118 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119 Py_DECREF(tag);
3120 }
3121 }
3122
3123 Py_XDECREF(res);
3124}
3125
3126static void
3127expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3128 const XML_Char *uri)
3129{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003130 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3131 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003132
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003133 if (PyErr_Occurred())
3134 return;
3135
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003136 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003137 return;
3138
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003139 if (!uri)
3140 uri = "";
3141 if (!prefix)
3142 prefix = "";
3143
3144 parcel = Py_BuildValue("ss", prefix, uri);
3145 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003146 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003147 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3148 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003149}
3150
3151static void
3152expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3153{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003154 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3155
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003156 if (PyErr_Occurred())
3157 return;
3158
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003159 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003160 return;
3161
3162 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003163}
3164
3165static void
3166expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3167{
3168 PyObject* comment;
3169 PyObject* res;
3170
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003171 if (PyErr_Occurred())
3172 return;
3173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003175 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003177 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3178 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179 Py_XDECREF(res);
3180 Py_DECREF(comment);
3181 }
3182 }
3183}
3184
Eli Bendersky45839902013-01-13 05:14:47 -08003185static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003186expat_start_doctype_handler(XMLParserObject *self,
3187 const XML_Char *doctype_name,
3188 const XML_Char *sysid,
3189 const XML_Char *pubid,
3190 int has_internal_subset)
3191{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003192 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003193 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003194 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003195
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003196 if (PyErr_Occurred())
3197 return;
3198
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003199 doctype_name_obj = makeuniversal(self, doctype_name);
3200 if (!doctype_name_obj)
3201 return;
3202
3203 if (sysid) {
3204 sysid_obj = makeuniversal(self, sysid);
3205 if (!sysid_obj) {
3206 Py_DECREF(doctype_name_obj);
3207 return;
3208 }
3209 } else {
3210 Py_INCREF(Py_None);
3211 sysid_obj = Py_None;
3212 }
3213
3214 if (pubid) {
3215 pubid_obj = makeuniversal(self, pubid);
3216 if (!pubid_obj) {
3217 Py_DECREF(doctype_name_obj);
3218 Py_DECREF(sysid_obj);
3219 return;
3220 }
3221 } else {
3222 Py_INCREF(Py_None);
3223 pubid_obj = Py_None;
3224 }
3225
3226 /* If the target has a handler for doctype, call it. */
3227 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003228 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3229 doctype_name_obj, pubid_obj,
3230 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003231 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003232 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003233 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3234 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3235 "The doctype() method of XMLParser is ignored. "
3236 "Define doctype() method on the TreeBuilder target.",
3237 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003238 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003239 }
3240
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003241 Py_DECREF(doctype_name_obj);
3242 Py_DECREF(pubid_obj);
3243 Py_DECREF(sysid_obj);
3244}
3245
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246static void
3247expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3248 const XML_Char* data_in)
3249{
3250 PyObject* target;
3251 PyObject* data;
3252 PyObject* res;
3253
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003254 if (PyErr_Occurred())
3255 return;
3256
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003258 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3259 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003261 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3262 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 Py_XDECREF(res);
3264 Py_DECREF(data);
3265 Py_DECREF(target);
3266 } else {
3267 Py_XDECREF(data);
3268 Py_XDECREF(target);
3269 }
3270 }
3271}
3272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274
Eli Bendersky52467b12012-06-01 07:13:08 +03003275static PyObject *
3276xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277{
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3279 if (self) {
3280 self->parser = NULL;
3281 self->target = self->entity = self->names = NULL;
3282 self->handle_start = self->handle_data = self->handle_end = NULL;
3283 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003284 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003286 return (PyObject *)self;
3287}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288
scoderc8d8e152017-09-14 22:00:03 +02003289static int
3290ignore_attribute_error(PyObject *value)
3291{
3292 if (value == NULL) {
3293 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3294 return -1;
3295 }
3296 PyErr_Clear();
3297 }
3298 return 0;
3299}
3300
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301/*[clinic input]
3302_elementtree.XMLParser.__init__
3303
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003304 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003305 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003306 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003307
3308[clinic start generated code]*/
3309
Eli Bendersky52467b12012-06-01 07:13:08 +03003310static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003311_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3312 const char *encoding)
3313/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003314{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003315 self->entity = PyDict_New();
3316 if (!self->entity)
3317 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318
Serhiy Storchakacb985562015-05-04 15:32:48 +03003319 self->names = PyDict_New();
3320 if (!self->names) {
3321 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003322 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003324
Serhiy Storchakacb985562015-05-04 15:32:48 +03003325 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3326 if (!self->parser) {
3327 Py_CLEAR(self->entity);
3328 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003330 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003332 /* expat < 2.1.0 has no XML_SetHashSalt() */
3333 if (EXPAT(SetHashSalt) != NULL) {
3334 EXPAT(SetHashSalt)(self->parser,
3335 (unsigned long)_Py_HashSecret.expat.hashsalt);
3336 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
Eli Bendersky52467b12012-06-01 07:13:08 +03003338 if (target) {
3339 Py_INCREF(target);
3340 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003341 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003343 Py_CLEAR(self->entity);
3344 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003345 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003347 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003348 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349
Serhiy Storchakacb985562015-05-04 15:32:48 +03003350 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003351 if (ignore_attribute_error(self->handle_start)) {
3352 return -1;
3353 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003354 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003355 if (ignore_attribute_error(self->handle_data)) {
3356 return -1;
3357 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003358 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003359 if (ignore_attribute_error(self->handle_end)) {
3360 return -1;
3361 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003362 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003363 if (ignore_attribute_error(self->handle_comment)) {
3364 return -1;
3365 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003366 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003367 if (ignore_attribute_error(self->handle_pi)) {
3368 return -1;
3369 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003370 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003371 if (ignore_attribute_error(self->handle_close)) {
3372 return -1;
3373 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003374 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003375 if (ignore_attribute_error(self->handle_doctype)) {
3376 return -1;
3377 }
Eli Bendersky45839902013-01-13 05:14:47 -08003378
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003380 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003382 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383 (XML_StartElementHandler) expat_start_handler,
3384 (XML_EndElementHandler) expat_end_handler
3385 );
3386 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388 (XML_DefaultHandler) expat_default_handler
3389 );
3390 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 (XML_CharacterDataHandler) expat_data_handler
3393 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003394 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003395 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003396 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 (XML_CommentHandler) expat_comment_handler
3398 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003399 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003401 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 (XML_ProcessingInstructionHandler) expat_pi_handler
3403 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003404 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003405 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003406 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3407 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003409 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003410 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003412
Eli Bendersky52467b12012-06-01 07:13:08 +03003413 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414}
3415
Eli Bendersky52467b12012-06-01 07:13:08 +03003416static int
3417xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3418{
3419 Py_VISIT(self->handle_close);
3420 Py_VISIT(self->handle_pi);
3421 Py_VISIT(self->handle_comment);
3422 Py_VISIT(self->handle_end);
3423 Py_VISIT(self->handle_data);
3424 Py_VISIT(self->handle_start);
3425
3426 Py_VISIT(self->target);
3427 Py_VISIT(self->entity);
3428 Py_VISIT(self->names);
3429
3430 return 0;
3431}
3432
3433static int
3434xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003435{
Victor Stinnere727d412017-09-18 05:29:37 -07003436 if (self->parser != NULL) {
3437 XML_Parser parser = self->parser;
3438 self->parser = NULL;
3439 EXPAT(ParserFree)(parser);
3440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003441
Antoine Pitrouc1948842012-10-01 23:40:37 +02003442 Py_CLEAR(self->handle_close);
3443 Py_CLEAR(self->handle_pi);
3444 Py_CLEAR(self->handle_comment);
3445 Py_CLEAR(self->handle_end);
3446 Py_CLEAR(self->handle_data);
3447 Py_CLEAR(self->handle_start);
3448 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449
Antoine Pitrouc1948842012-10-01 23:40:37 +02003450 Py_CLEAR(self->target);
3451 Py_CLEAR(self->entity);
3452 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003453
Eli Bendersky52467b12012-06-01 07:13:08 +03003454 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003455}
3456
Eli Bendersky52467b12012-06-01 07:13:08 +03003457static void
3458xmlparser_dealloc(XMLParserObject* self)
3459{
3460 PyObject_GC_UnTrack(self);
3461 xmlparser_gc_clear(self);
3462 Py_TYPE(self)->tp_free((PyObject *)self);
3463}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464
3465LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003466expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467{
3468 int ok;
3469
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003470 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3472
3473 if (PyErr_Occurred())
3474 return NULL;
3475
3476 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003477 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003478 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003480 EXPAT(GetErrorColumnNumber)(self->parser),
3481 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 );
3483 return NULL;
3484 }
3485
3486 Py_RETURN_NONE;
3487}
3488
Serhiy Storchakacb985562015-05-04 15:32:48 +03003489/*[clinic input]
3490_elementtree.XMLParser.close
3491
3492[clinic start generated code]*/
3493
3494static PyObject *
3495_elementtree_XMLParser_close_impl(XMLParserObject *self)
3496/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003497{
3498 /* end feeding data to parser */
3499
3500 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003502 if (!res)
3503 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003505 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003506 Py_DECREF(res);
3507 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003508 }
3509 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003510 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003511 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003512 }
3513 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003514 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003515 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516}
3517
Serhiy Storchakacb985562015-05-04 15:32:48 +03003518/*[clinic input]
3519_elementtree.XMLParser.feed
3520
3521 data: object
3522 /
3523
3524[clinic start generated code]*/
3525
3526static PyObject *
3527_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3528/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003529{
3530 /* feed data to parser */
3531
Serhiy Storchakacb985562015-05-04 15:32:48 +03003532 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003533 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003534 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3535 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003536 return NULL;
3537 if (data_len > INT_MAX) {
3538 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3539 return NULL;
3540 }
3541 /* Explicitly set UTF-8 encoding. Return code ignored. */
3542 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003543 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003544 }
3545 else {
3546 Py_buffer view;
3547 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003548 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003549 return NULL;
3550 if (view.len > INT_MAX) {
3551 PyBuffer_Release(&view);
3552 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3553 return NULL;
3554 }
3555 res = expat_parse(self, view.buf, (int)view.len, 0);
3556 PyBuffer_Release(&view);
3557 return res;
3558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003559}
3560
Serhiy Storchakacb985562015-05-04 15:32:48 +03003561/*[clinic input]
3562_elementtree.XMLParser._parse_whole
3563
3564 file: object
3565 /
3566
3567[clinic start generated code]*/
3568
3569static PyObject *
3570_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3571/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003572{
Eli Benderskya3699232013-05-19 18:47:23 -07003573 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003574 PyObject* reader;
3575 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003576 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577 PyObject* res;
3578
Serhiy Storchakacb985562015-05-04 15:32:48 +03003579 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580 if (!reader)
3581 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003582
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583 /* read from open file object */
3584 for (;;) {
3585
3586 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3587
3588 if (!buffer) {
3589 /* read failed (e.g. due to KeyboardInterrupt) */
3590 Py_DECREF(reader);
3591 return NULL;
3592 }
3593
Eli Benderskyf996e772012-03-16 05:53:30 +02003594 if (PyUnicode_CheckExact(buffer)) {
3595 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003596 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003597 Py_DECREF(buffer);
3598 break;
3599 }
3600 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003601 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003602 if (!temp) {
3603 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003604 Py_DECREF(reader);
3605 return NULL;
3606 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003607 buffer = temp;
3608 }
3609 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610 Py_DECREF(buffer);
3611 break;
3612 }
3613
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003614 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3615 Py_DECREF(buffer);
3616 Py_DECREF(reader);
3617 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3618 return NULL;
3619 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003620 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003621 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622 );
3623
3624 Py_DECREF(buffer);
3625
3626 if (!res) {
3627 Py_DECREF(reader);
3628 return NULL;
3629 }
3630 Py_DECREF(res);
3631
3632 }
3633
3634 Py_DECREF(reader);
3635
3636 res = expat_parse(self, "", 0, 1);
3637
3638 if (res && TreeBuilder_CheckExact(self->target)) {
3639 Py_DECREF(res);
3640 return treebuilder_done((TreeBuilderObject*) self->target);
3641 }
3642
3643 return res;
3644}
3645
Serhiy Storchakacb985562015-05-04 15:32:48 +03003646/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003647_elementtree.XMLParser._setevents
3648
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003649 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003650 events_to_report: object = None
3651 /
3652
3653[clinic start generated code]*/
3654
3655static PyObject *
3656_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3657 PyObject *events_queue,
3658 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003659/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660{
3661 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003662 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003664 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665
3666 if (!TreeBuilder_CheckExact(self->target)) {
3667 PyErr_SetString(
3668 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003669 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670 "targets"
3671 );
3672 return NULL;
3673 }
3674
3675 target = (TreeBuilderObject*) self->target;
3676
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003677 events_append = PyObject_GetAttrString(events_queue, "append");
3678 if (events_append == NULL)
3679 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003680 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681
3682 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003683 Py_CLEAR(target->start_event_obj);
3684 Py_CLEAR(target->end_event_obj);
3685 Py_CLEAR(target->start_ns_event_obj);
3686 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003688 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003690 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003691 Py_RETURN_NONE;
3692 }
3693
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003694 if (!(events_seq = PySequence_Fast(events_to_report,
3695 "events must be a sequence"))) {
3696 return NULL;
3697 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003698
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003699 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003700 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003701 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003702 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003703 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003704 } else if (PyBytes_Check(event_name_obj)) {
3705 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003706 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003707 if (event_name == NULL) {
3708 Py_DECREF(events_seq);
3709 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3710 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003711 }
3712
3713 Py_INCREF(event_name_obj);
3714 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003715 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003716 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003717 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003718 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003719 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720 EXPAT(SetNamespaceDeclHandler)(
3721 self->parser,
3722 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3723 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3724 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003725 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003726 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003727 EXPAT(SetNamespaceDeclHandler)(
3728 self->parser,
3729 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3730 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3731 );
3732 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003733 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003734 Py_DECREF(events_seq);
3735 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003736 return NULL;
3737 }
3738 }
3739
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003740 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003741 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003742}
3743
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003744static PyMemberDef xmlparser_members[] = {
3745 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
3746 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
3747 {NULL}
3748};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003749
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003750static PyObject*
3751xmlparser_version_getter(XMLParserObject *self, void *closure)
3752{
3753 return PyUnicode_FromFormat(
3754 "Expat %d.%d.%d", XML_MAJOR_VERSION,
3755 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003756}
3757
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003758static PyGetSetDef xmlparser_getsetlist[] = {
3759 {"version", (getter)xmlparser_version_getter, NULL, NULL},
3760 {NULL},
3761};
3762
Serhiy Storchakacb985562015-05-04 15:32:48 +03003763#include "clinic/_elementtree.c.h"
3764
3765static PyMethodDef element_methods[] = {
3766
3767 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3768
3769 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3770 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3771
3772 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3773 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3774 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3775
3776 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3777 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3778 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3779 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3780
3781 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3782 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3783 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3784
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003785 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003786 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3787
3788 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3789 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3790
3791 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3792
3793 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3794 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3795 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3796 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3797 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3798
3799 {NULL, NULL}
3800};
3801
3802static PyMappingMethods element_as_mapping = {
3803 (lenfunc) element_length,
3804 (binaryfunc) element_subscr,
3805 (objobjargproc) element_ass_subscr,
3806};
3807
Serhiy Storchakadde08152015-11-25 15:28:13 +02003808static PyGetSetDef element_getsetlist[] = {
3809 {"tag",
3810 (getter)element_tag_getter,
3811 (setter)element_tag_setter,
3812 "A string identifying what kind of data this element represents"},
3813 {"text",
3814 (getter)element_text_getter,
3815 (setter)element_text_setter,
3816 "A string of text directly after the start tag, or None"},
3817 {"tail",
3818 (getter)element_tail_getter,
3819 (setter)element_tail_setter,
3820 "A string of text directly after the end tag, or None"},
3821 {"attrib",
3822 (getter)element_attrib_getter,
3823 (setter)element_attrib_setter,
3824 "A dictionary containing the element's attributes"},
3825 {NULL},
3826};
3827
Serhiy Storchakacb985562015-05-04 15:32:48 +03003828static PyTypeObject Element_Type = {
3829 PyVarObject_HEAD_INIT(NULL, 0)
3830 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3831 /* methods */
3832 (destructor)element_dealloc, /* tp_dealloc */
3833 0, /* tp_print */
3834 0, /* tp_getattr */
3835 0, /* tp_setattr */
3836 0, /* tp_reserved */
3837 (reprfunc)element_repr, /* tp_repr */
3838 0, /* tp_as_number */
3839 &element_as_sequence, /* tp_as_sequence */
3840 &element_as_mapping, /* tp_as_mapping */
3841 0, /* tp_hash */
3842 0, /* tp_call */
3843 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003844 PyObject_GenericGetAttr, /* tp_getattro */
3845 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003846 0, /* tp_as_buffer */
3847 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3848 /* tp_flags */
3849 0, /* tp_doc */
3850 (traverseproc)element_gc_traverse, /* tp_traverse */
3851 (inquiry)element_gc_clear, /* tp_clear */
3852 0, /* tp_richcompare */
3853 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3854 0, /* tp_iter */
3855 0, /* tp_iternext */
3856 element_methods, /* tp_methods */
3857 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003858 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003859 0, /* tp_base */
3860 0, /* tp_dict */
3861 0, /* tp_descr_get */
3862 0, /* tp_descr_set */
3863 0, /* tp_dictoffset */
3864 (initproc)element_init, /* tp_init */
3865 PyType_GenericAlloc, /* tp_alloc */
3866 element_new, /* tp_new */
3867 0, /* tp_free */
3868};
3869
3870static PyMethodDef treebuilder_methods[] = {
3871 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3872 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3873 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3874 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3875 {NULL, NULL}
3876};
3877
3878static PyTypeObject TreeBuilder_Type = {
3879 PyVarObject_HEAD_INIT(NULL, 0)
3880 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3881 /* methods */
3882 (destructor)treebuilder_dealloc, /* tp_dealloc */
3883 0, /* tp_print */
3884 0, /* tp_getattr */
3885 0, /* tp_setattr */
3886 0, /* tp_reserved */
3887 0, /* tp_repr */
3888 0, /* tp_as_number */
3889 0, /* tp_as_sequence */
3890 0, /* tp_as_mapping */
3891 0, /* tp_hash */
3892 0, /* tp_call */
3893 0, /* tp_str */
3894 0, /* tp_getattro */
3895 0, /* tp_setattro */
3896 0, /* tp_as_buffer */
3897 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3898 /* tp_flags */
3899 0, /* tp_doc */
3900 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3901 (inquiry)treebuilder_gc_clear, /* tp_clear */
3902 0, /* tp_richcompare */
3903 0, /* tp_weaklistoffset */
3904 0, /* tp_iter */
3905 0, /* tp_iternext */
3906 treebuilder_methods, /* tp_methods */
3907 0, /* tp_members */
3908 0, /* tp_getset */
3909 0, /* tp_base */
3910 0, /* tp_dict */
3911 0, /* tp_descr_get */
3912 0, /* tp_descr_set */
3913 0, /* tp_dictoffset */
3914 _elementtree_TreeBuilder___init__, /* tp_init */
3915 PyType_GenericAlloc, /* tp_alloc */
3916 treebuilder_new, /* tp_new */
3917 0, /* tp_free */
3918};
3919
3920static PyMethodDef xmlparser_methods[] = {
3921 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3922 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3923 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3924 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003925 {NULL, NULL}
3926};
3927
Neal Norwitz227b5332006-03-22 09:28:35 +00003928static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003929 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003930 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003931 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003932 (destructor)xmlparser_dealloc, /* tp_dealloc */
3933 0, /* tp_print */
3934 0, /* tp_getattr */
3935 0, /* tp_setattr */
3936 0, /* tp_reserved */
3937 0, /* tp_repr */
3938 0, /* tp_as_number */
3939 0, /* tp_as_sequence */
3940 0, /* tp_as_mapping */
3941 0, /* tp_hash */
3942 0, /* tp_call */
3943 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003944 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03003945 0, /* tp_setattro */
3946 0, /* tp_as_buffer */
3947 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3948 /* tp_flags */
3949 0, /* tp_doc */
3950 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3951 (inquiry)xmlparser_gc_clear, /* tp_clear */
3952 0, /* tp_richcompare */
3953 0, /* tp_weaklistoffset */
3954 0, /* tp_iter */
3955 0, /* tp_iternext */
3956 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03003957 xmlparser_members, /* tp_members */
3958 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03003959 0, /* tp_base */
3960 0, /* tp_dict */
3961 0, /* tp_descr_get */
3962 0, /* tp_descr_set */
3963 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003964 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003965 PyType_GenericAlloc, /* tp_alloc */
3966 xmlparser_new, /* tp_new */
3967 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003968};
3969
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003970/* ==================================================================== */
3971/* python module interface */
3972
3973static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02003974 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003975 {NULL, NULL}
3976};
3977
Martin v. Löwis1a214512008-06-11 05:26:20 +00003978
Eli Bendersky532d03e2013-08-10 08:00:39 -07003979static struct PyModuleDef elementtreemodule = {
3980 PyModuleDef_HEAD_INIT,
3981 "_elementtree",
3982 NULL,
3983 sizeof(elementtreestate),
3984 _functions,
3985 NULL,
3986 elementtree_traverse,
3987 elementtree_clear,
3988 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003989};
3990
Neal Norwitzf6657e62006-12-28 04:47:50 +00003991PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003992PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003993{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003994 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003995 elementtreestate *st;
3996
3997 m = PyState_FindModule(&elementtreemodule);
3998 if (m) {
3999 Py_INCREF(m);
4000 return m;
4001 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004002
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004003 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004004 if (PyType_Ready(&ElementIter_Type) < 0)
4005 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004006 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004007 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004008 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004009 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004010 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004011 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004012
Eli Bendersky532d03e2013-08-10 08:00:39 -07004013 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004014 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004015 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004016 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004017
Eli Bendersky828efde2012-04-05 05:40:58 +03004018 if (!(temp = PyImport_ImportModule("copy")))
4019 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004020 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004021 Py_XDECREF(temp);
4022
Victor Stinnerb136f112017-07-10 22:28:02 +02004023 if (st->deepcopy_obj == NULL) {
4024 return NULL;
4025 }
4026
4027 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004028 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004029 return NULL;
4030
Eli Bendersky20d41742012-06-01 09:48:37 +03004031 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004032 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4033 if (expat_capi) {
4034 /* check that it's usable */
4035 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004036 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004037 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4038 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004039 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004040 PyErr_SetString(PyExc_ImportError,
4041 "pyexpat version is incompatible");
4042 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004043 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004044 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004045 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004046 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004047
Eli Bendersky532d03e2013-08-10 08:00:39 -07004048 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004049 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004050 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004051 Py_INCREF(st->parseerror_obj);
4052 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004053
Eli Bendersky092af1f2012-03-04 07:14:03 +02004054 Py_INCREF((PyObject *)&Element_Type);
4055 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4056
Eli Bendersky58d548d2012-05-29 15:45:16 +03004057 Py_INCREF((PyObject *)&TreeBuilder_Type);
4058 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4059
Eli Bendersky52467b12012-06-01 07:13:08 +03004060 Py_INCREF((PyObject *)&XMLParser_Type);
4061 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004062
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004063 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004064}