blob: 5481c61678712b72eb089f42f4c96ea33a322c65 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
104#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
105
106/* Find the module instance imported in the currently running sub-interpreter
107 * and get its state.
108 */
109#define ET_STATE_GLOBAL \
110 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
111
112static int
113elementtree_clear(PyObject *m)
114{
115 elementtreestate *st = ET_STATE(m);
116 Py_CLEAR(st->parseerror_obj);
117 Py_CLEAR(st->deepcopy_obj);
118 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200119 Py_CLEAR(st->comment_factory);
120 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700121 return 0;
122}
123
124static int
125elementtree_traverse(PyObject *m, visitproc visit, void *arg)
126{
127 elementtreestate *st = ET_STATE(m);
128 Py_VISIT(st->parseerror_obj);
129 Py_VISIT(st->deepcopy_obj);
130 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200131 Py_VISIT(st->comment_factory);
132 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700133 return 0;
134}
135
136static void
137elementtree_free(void *m)
138{
139 elementtree_clear((PyObject *)m);
140}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000141
142/* helpers */
143
144LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145list_join(PyObject* list)
146{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300147 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000148 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 PyObject* result;
150
Antoine Pitrouc1948842012-10-01 23:40:37 +0200151 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 if (!joiner)
153 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200154 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000156 return result;
157}
158
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159/* Is the given object an empty dictionary?
160*/
161static int
162is_empty_dict(PyObject *obj)
163{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200164 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165}
166
167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200169/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000170
171typedef struct {
172
173 /* attributes (a dictionary object), or None if no attributes */
174 PyObject* attrib;
175
176 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200177 Py_ssize_t length; /* actual number of items */
178 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179
180 /* this either points to _children or to a malloced buffer */
181 PyObject* *children;
182
183 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185} ElementObjectExtra;
186
187typedef struct {
188 PyObject_HEAD
189
190 /* element tag (a string). */
191 PyObject* tag;
192
193 /* text before first child. note that this is a tagged pointer;
194 use JOIN_OBJ to get the object pointer. the join flag is used
195 to distinguish lists created by the tree builder from lists
196 assigned to the attribute by application code; the former
197 should be joined before being returned to the user, the latter
198 should be left intact. */
199 PyObject* text;
200
201 /* text after this element, in parent. note that this is a tagged
202 pointer; use JOIN_OBJ to get the object pointer. */
203 PyObject* tail;
204
205 ElementObjectExtra* extra;
206
Eli Benderskyebf37a22012-04-03 22:02:37 +0300207 PyObject *weakreflist; /* For tp_weaklistoffset */
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209} ElementObject;
210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000211
Christian Heimes90aa7642007-12-19 02:45:37 +0000212#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300213#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215
216/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200217/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000218
219LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200220create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221{
222 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200223 if (!self->extra) {
224 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000225 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200226 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228 if (!attrib)
229 attrib = Py_None;
230
231 Py_INCREF(attrib);
232 self->extra->attrib = attrib;
233
234 self->extra->length = 0;
235 self->extra->allocated = STATIC_CHILDREN;
236 self->extra->children = self->extra->_children;
237
238 return 0;
239}
240
241LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300242dealloc_extra(ElementObjectExtra *extra)
243{
244 Py_ssize_t i;
245
246 if (!extra)
247 return;
248
249 Py_DECREF(extra->attrib);
250
251 for (i = 0; i < extra->length; i++)
252 Py_DECREF(extra->children[i]);
253
254 if (extra->children != extra->_children)
255 PyObject_Free(extra->children);
256
257 PyObject_Free(extra);
258}
259
260LOCAL(void)
261clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262{
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300264
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 if (!self->extra)
266 return;
267
268 /* Avoid DECREFs calling into this code again (cycles, etc.)
269 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300270 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 self->extra = NULL;
272
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300273 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000274}
275
Eli Bendersky092af1f2012-03-04 07:14:03 +0200276/* Convenience internal function to create new Element objects with the given
277 * tag and attributes.
278*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200280create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281{
282 ElementObject* self;
283
Eli Bendersky0192ba32012-03-30 16:38:33 +0300284 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 if (self == NULL)
286 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 self->extra = NULL;
288
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000289 Py_INCREF(tag);
290 self->tag = tag;
291
292 Py_INCREF(Py_None);
293 self->text = Py_None;
294
295 Py_INCREF(Py_None);
296 self->tail = Py_None;
297
Eli Benderskyebf37a22012-04-03 22:02:37 +0300298 self->weakreflist = NULL;
299
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200300 ALLOC(sizeof(ElementObject), "create element");
301 PyObject_GC_Track(self);
302
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200303 if (attrib != Py_None && !is_empty_dict(attrib)) {
304 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200305 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200306 return NULL;
307 }
308 }
309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000310 return (PyObject*) self;
311}
312
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313static PyObject *
314element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
315{
316 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
317 if (e != NULL) {
318 Py_INCREF(Py_None);
319 e->tag = Py_None;
320
321 Py_INCREF(Py_None);
322 e->text = Py_None;
323
324 Py_INCREF(Py_None);
325 e->tail = Py_None;
326
327 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300328 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200329 }
330 return (PyObject *)e;
331}
332
Eli Bendersky737b1732012-05-29 06:02:56 +0300333/* Helper function for extracting the attrib dictionary from a keywords dict.
334 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800335 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700337 *
338 * Return a dictionary with the content of kwds merged into the content of
339 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300340 */
341static PyObject*
342get_attrib_from_keywords(PyObject *kwds)
343{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700344 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600345 if (attrib_str == NULL) {
346 return NULL;
347 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200348 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300349
350 if (attrib) {
351 /* If attrib was found in kwds, copy its value and remove it from
352 * kwds
353 */
354 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
357 Py_TYPE(attrib)->tp_name);
358 return NULL;
359 }
360 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200361 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
362 Py_DECREF(attrib);
363 attrib = NULL;
364 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200365 }
366 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300367 attrib = PyDict_New();
368 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700369
370 Py_DECREF(attrib_str);
371
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600372 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
373 Py_DECREF(attrib);
374 return NULL;
375 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return attrib;
377}
378
Serhiy Storchakacb985562015-05-04 15:32:48 +0300379/*[clinic input]
380module _elementtree
381class _elementtree.Element "ElementObject *" "&Element_Type"
382class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
383class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
384[clinic start generated code]*/
385/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
386
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387static int
388element_init(PyObject *self, PyObject *args, PyObject *kwds)
389{
390 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 PyObject *attrib = NULL;
392 ElementObject *self_elem;
393
394 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
395 return -1;
396
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (attrib) {
398 /* attrib passed as positional arg */
399 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 if (!attrib)
401 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300402 if (kwds) {
403 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 return -1;
406 }
407 }
408 } else if (kwds) {
409 /* have keywords args */
410 attrib = get_attrib_from_keywords(kwds);
411 if (!attrib)
412 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 }
414
415 self_elem = (ElementObject *)self;
416
Antoine Pitrouc1948842012-10-01 23:40:37 +0200417 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200419 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420 return -1;
421 }
422 }
423
Eli Bendersky48d358b2012-05-30 17:57:50 +0300424 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200428 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300429 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200430
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300432 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
437 return 0;
438}
439
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200441element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200443 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 PyObject* *children;
445
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300446 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300496LOCAL(void)
497raise_type_error(PyObject *element)
498{
499 PyErr_Format(PyExc_TypeError,
500 "expected an Element, not \"%.200s\"",
501 Py_TYPE(element)->tp_name);
502}
503
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000504LOCAL(int)
505element_add_subelement(ElementObject* self, PyObject* element)
506{
507 /* add a child element to a parent */
508
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300509 if (!Element_Check(element)) {
510 raise_type_error(element);
511 return -1;
512 }
513
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514 if (element_resize(self, 1) < 0)
515 return -1;
516
517 Py_INCREF(element);
518 self->extra->children[self->extra->length] = element;
519
520 self->extra->length++;
521
522 return 0;
523}
524
525LOCAL(PyObject*)
526element_get_attrib(ElementObject* self)
527{
528 /* return borrowed reference to attrib dictionary */
529 /* note: this function assumes that the extra section exists */
530
531 PyObject* res = self->extra->attrib;
532
533 if (res == Py_None) {
534 /* create missing dictionary */
535 res = PyDict_New();
536 if (!res)
537 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200538 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 self->extra->attrib = res;
540 }
541
542 return res;
543}
544
545LOCAL(PyObject*)
546element_get_text(ElementObject* self)
547{
548 /* return borrowed reference to text attribute */
549
Serhiy Storchaka576def02017-03-30 09:47:31 +0300550 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000551
552 if (JOIN_GET(res)) {
553 res = JOIN_OBJ(res);
554 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300555 PyObject *tmp = list_join(res);
556 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300558 self->text = tmp;
559 Py_DECREF(res);
560 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561 }
562 }
563
564 return res;
565}
566
567LOCAL(PyObject*)
568element_get_tail(ElementObject* self)
569{
570 /* return borrowed reference to text attribute */
571
Serhiy Storchaka576def02017-03-30 09:47:31 +0300572 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000573
574 if (JOIN_GET(res)) {
575 res = JOIN_OBJ(res);
576 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300577 PyObject *tmp = list_join(res);
578 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300580 self->tail = tmp;
581 Py_DECREF(res);
582 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 }
584 }
585
586 return res;
587}
588
589static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300590subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591{
592 PyObject* elem;
593
594 ElementObject* parent;
595 PyObject* tag;
596 PyObject* attrib = NULL;
597 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
598 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800599 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000600 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000602
Eli Bendersky737b1732012-05-29 06:02:56 +0300603 if (attrib) {
604 /* attrib passed as positional arg */
605 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606 if (!attrib)
607 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600608 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
609 Py_DECREF(attrib);
610 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300611 }
612 } else if (kwds) {
613 /* have keyword args */
614 attrib = get_attrib_from_keywords(kwds);
615 if (!attrib)
616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000617 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300618 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000619 Py_INCREF(Py_None);
620 attrib = Py_None;
621 }
622
Eli Bendersky092af1f2012-03-04 07:14:03 +0200623 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200625 if (elem == NULL)
626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 if (element_add_subelement(parent, elem) < 0) {
629 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000630 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000631 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000632
633 return elem;
634}
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636static int
637element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
638{
639 Py_VISIT(self->tag);
640 Py_VISIT(JOIN_OBJ(self->text));
641 Py_VISIT(JOIN_OBJ(self->tail));
642
643 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200644 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300645 Py_VISIT(self->extra->attrib);
646
647 for (i = 0; i < self->extra->length; ++i)
648 Py_VISIT(self->extra->children[i]);
649 }
650 return 0;
651}
652
653static int
654element_gc_clear(ElementObject *self)
655{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700657 _clear_joined_ptr(&self->text);
658 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659
660 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300661 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300662 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300663 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300664 return 0;
665}
666
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667static void
668element_dealloc(ElementObject* self)
669{
INADA Naokia6296d32017-08-24 14:55:17 +0900670 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300671 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200672 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300673
674 if (self->weakreflist != NULL)
675 PyObject_ClearWeakRefs((PyObject *) self);
676
Eli Bendersky0192ba32012-03-30 16:38:33 +0300677 /* element_gc_clear clears all references and deallocates extra
678 */
679 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200682 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200683 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684}
685
686/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000690
Serhiy Storchakacb985562015-05-04 15:32:48 +0300691 subelement: object(subclass_of='&Element_Type')
692 /
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
698/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
699{
700 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 return NULL;
702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element_clear_impl(ElementObject *self)
713/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
714{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300715 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300721 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
723 Py_RETURN_NONE;
724}
725
Serhiy Storchakacb985562015-05-04 15:32:48 +0300726/*[clinic input]
727_elementtree.Element.__copy__
728
729[clinic start generated code]*/
730
731static PyObject *
732_elementtree_Element___copy___impl(ElementObject *self)
733/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000734{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200735 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000736 ElementObject* element;
737
Eli Bendersky092af1f2012-03-04 07:14:03 +0200738 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800739 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 if (!element)
741 return NULL;
742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->text));
744 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Oren Milman39ecb9c2017-10-10 23:26:24 +0300746 Py_INCREF(JOIN_OBJ(self->tail));
747 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300749 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 if (element_resize(element, self->extra->length) < 0) {
752 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000755
756 for (i = 0; i < self->extra->length; i++) {
757 Py_INCREF(self->extra->children[i]);
758 element->extra->children[i] = self->extra->children[i];
759 }
760
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300761 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000763 }
764
765 return (PyObject*) element;
766}
767
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200768/* Helper for a deep copy. */
769LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
770
Serhiy Storchakacb985562015-05-04 15:32:48 +0300771/*[clinic input]
772_elementtree.Element.__deepcopy__
773
Oren Milmand0568182017-09-12 17:39:15 +0300774 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300775 /
776
777[clinic start generated code]*/
778
779static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300780_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
781/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000782{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200783 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 ElementObject* element;
785 PyObject* tag;
786 PyObject* attrib;
787 PyObject* text;
788 PyObject* tail;
789 PyObject* id;
790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 tag = deepcopy(self->tag, memo);
792 if (!tag)
793 return NULL;
794
795 if (self->extra) {
796 attrib = deepcopy(self->extra->attrib, memo);
797 if (!attrib) {
798 Py_DECREF(tag);
799 return NULL;
800 }
801 } else {
802 Py_INCREF(Py_None);
803 attrib = Py_None;
804 }
805
Eli Bendersky092af1f2012-03-04 07:14:03 +0200806 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 Py_DECREF(tag);
809 Py_DECREF(attrib);
810
811 if (!element)
812 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814 text = deepcopy(JOIN_OBJ(self->text), memo);
815 if (!text)
816 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300817 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000818
819 tail = deepcopy(JOIN_OBJ(self->tail), memo);
820 if (!tail)
821 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300822 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300824 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000825 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826 if (element_resize(element, self->extra->length) < 0)
827 goto error;
828
829 for (i = 0; i < self->extra->length; i++) {
830 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300831 if (!child || !Element_Check(child)) {
832 if (child) {
833 raise_type_error(child);
834 Py_DECREF(child);
835 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000836 element->extra->length = i;
837 goto error;
838 }
839 element->extra->children[i] = child;
840 }
841
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300842 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000843 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000844 }
845
846 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700847 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000848 if (!id)
849 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000850
851 i = PyDict_SetItem(memo, id, (PyObject*) element);
852
853 Py_DECREF(id);
854
855 if (i < 0)
856 goto error;
857
858 return (PyObject*) element;
859
860 error:
861 Py_DECREF(element);
862 return NULL;
863}
864
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865LOCAL(PyObject *)
866deepcopy(PyObject *object, PyObject *memo)
867{
868 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200870 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200871
872 /* Fast paths */
873 if (object == Py_None || PyUnicode_CheckExact(object)) {
874 Py_INCREF(object);
875 return object;
876 }
877
878 if (Py_REFCNT(object) == 1) {
879 if (PyDict_CheckExact(object)) {
880 PyObject *key, *value;
881 Py_ssize_t pos = 0;
882 int simple = 1;
883 while (PyDict_Next(object, &pos, &key, &value)) {
884 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
885 simple = 0;
886 break;
887 }
888 }
889 if (simple)
890 return PyDict_Copy(object);
891 /* Fall through to general case */
892 }
893 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300894 return _elementtree_Element___deepcopy___impl(
895 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200896 }
897 }
898
899 /* General case */
900 st = ET_STATE_GLOBAL;
901 if (!st->deepcopy_obj) {
902 PyErr_SetString(PyExc_RuntimeError,
903 "deepcopy helper not found");
904 return NULL;
905 }
906
Victor Stinner7fbac452016-08-20 01:34:44 +0200907 stack[0] = object;
908 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200909 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200910}
911
912
Serhiy Storchakacb985562015-05-04 15:32:48 +0300913/*[clinic input]
914_elementtree.Element.__sizeof__ -> Py_ssize_t
915
916[clinic start generated code]*/
917
918static Py_ssize_t
919_elementtree_Element___sizeof___impl(ElementObject *self)
920/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200921{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200922 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200923 if (self->extra) {
924 result += sizeof(ElementObjectExtra);
925 if (self->extra->children != self->extra->_children)
926 result += sizeof(PyObject*) * self->extra->allocated;
927 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300928 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200929}
930
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931/* dict keys for getstate/setstate. */
932#define PICKLED_TAG "tag"
933#define PICKLED_CHILDREN "_children"
934#define PICKLED_ATTRIB "attrib"
935#define PICKLED_TAIL "tail"
936#define PICKLED_TEXT "text"
937
938/* __getstate__ returns a fabricated instance dict as in the pure-Python
939 * Element implementation, for interoperability/interchangeability. This
940 * makes the pure-Python implementation details an API, but (a) there aren't
941 * any unnecessary structures there; and (b) it buys compatibility with 3.2
942 * pickles. See issue #16076.
943 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300944/*[clinic input]
945_elementtree.Element.__getstate__
946
947[clinic start generated code]*/
948
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300950_elementtree_Element___getstate___impl(ElementObject *self)
951/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800952{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200953 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800954 PyObject *instancedict = NULL, *children;
955
956 /* Build a list of children. */
957 children = PyList_New(self->extra ? self->extra->length : 0);
958 if (!children)
959 return NULL;
960 for (i = 0; i < PyList_GET_SIZE(children); i++) {
961 PyObject *child = self->extra->children[i];
962 Py_INCREF(child);
963 PyList_SET_ITEM(children, i, child);
964 }
965
966 /* Construct the state object. */
967 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
968 if (noattrib)
969 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
970 PICKLED_TAG, self->tag,
971 PICKLED_CHILDREN, children,
972 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 PICKLED_TEXT, JOIN_OBJ(self->text),
974 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975 else
976 instancedict = Py_BuildValue("{sOsOsOsOsO}",
977 PICKLED_TAG, self->tag,
978 PICKLED_CHILDREN, children,
979 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700980 PICKLED_TEXT, JOIN_OBJ(self->text),
981 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800982 if (instancedict) {
983 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 else {
987 for (i = 0; i < PyList_GET_SIZE(children); i++)
988 Py_DECREF(PyList_GET_ITEM(children, i));
989 Py_DECREF(children);
990
991 return NULL;
992 }
993}
994
995static PyObject *
996element_setstate_from_attributes(ElementObject *self,
997 PyObject *tag,
998 PyObject *attrib,
999 PyObject *text,
1000 PyObject *tail,
1001 PyObject *children)
1002{
1003 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001004 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005
1006 if (!tag) {
1007 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
1008 return NULL;
1009 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001011 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
Oren Milman39ecb9c2017-10-10 23:26:24 +03001014 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1015 Py_INCREF(JOIN_OBJ(text));
1016 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017
Oren Milman39ecb9c2017-10-10 23:26:24 +03001018 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1019 Py_INCREF(JOIN_OBJ(tail));
1020 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021
1022 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001023 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001024 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001025 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026
1027 /* Compute 'nchildren'. */
1028 if (children) {
1029 if (!PyList_Check(children)) {
1030 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1031 return NULL;
1032 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001033 nchildren = PyList_GET_SIZE(children);
1034
1035 /* (Re-)allocate 'extra'.
1036 Avoid DECREFs calling into this code again (cycles, etc.)
1037 */
1038 oldextra = self->extra;
1039 self->extra = NULL;
1040 if (element_resize(self, nchildren)) {
1041 assert(!self->extra || !self->extra->length);
1042 clear_extra(self);
1043 self->extra = oldextra;
1044 return NULL;
1045 }
1046 assert(self->extra);
1047 assert(self->extra->allocated >= nchildren);
1048 if (oldextra) {
1049 assert(self->extra->attrib == Py_None);
1050 self->extra->attrib = oldextra->attrib;
1051 oldextra->attrib = Py_None;
1052 }
1053
1054 /* Copy children */
1055 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001056 PyObject *child = PyList_GET_ITEM(children, i);
1057 if (!Element_Check(child)) {
1058 raise_type_error(child);
1059 self->extra->length = i;
1060 dealloc_extra(oldextra);
1061 return NULL;
1062 }
1063 Py_INCREF(child);
1064 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001065 }
1066
1067 assert(!self->extra->length);
1068 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001069 }
1070 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001071 if (element_resize(self, 0)) {
1072 return NULL;
1073 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 }
1075
Eli Bendersky698bdb22013-01-10 06:01:06 -08001076 /* Stash attrib. */
1077 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001078 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001079 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001080 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001081 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082
1083 Py_RETURN_NONE;
1084}
1085
1086/* __setstate__ for Element instance from the Python implementation.
1087 * 'state' should be the instance dict.
1088 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001089
Eli Bendersky698bdb22013-01-10 06:01:06 -08001090static PyObject *
1091element_setstate_from_Python(ElementObject *self, PyObject *state)
1092{
1093 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1094 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1095 PyObject *args;
1096 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001097 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098
Eli Bendersky698bdb22013-01-10 06:01:06 -08001099 tag = attrib = text = tail = children = NULL;
1100 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001101 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001102 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001103
1104 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1105 &attrib, &text, &tail, &children))
1106 retval = element_setstate_from_attributes(self, tag, attrib, text,
1107 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001109 retval = NULL;
1110
1111 Py_DECREF(args);
1112 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001113}
1114
Serhiy Storchakacb985562015-05-04 15:32:48 +03001115/*[clinic input]
1116_elementtree.Element.__setstate__
1117
1118 state: object
1119 /
1120
1121[clinic start generated code]*/
1122
Eli Bendersky698bdb22013-01-10 06:01:06 -08001123static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001124_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1125/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001126{
1127 if (!PyDict_CheckExact(state)) {
1128 PyErr_Format(PyExc_TypeError,
1129 "Don't know how to unpickle \"%.200R\" as an Element",
1130 state);
1131 return NULL;
1132 }
1133 else
1134 return element_setstate_from_Python(self, state);
1135}
1136
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001137LOCAL(int)
1138checkpath(PyObject* tag)
1139{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 Py_ssize_t i;
1141 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001142
1143 /* check if a tag contains an xpath character */
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145#define PATHCHAR(ch) \
1146 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001148 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1150 void *data = PyUnicode_DATA(tag);
1151 unsigned int kind = PyUnicode_KIND(tag);
1152 for (i = 0; i < len; i++) {
1153 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1154 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001155 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001157 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001159 return 1;
1160 }
1161 return 0;
1162 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001163 if (PyBytes_Check(tag)) {
1164 char *p = PyBytes_AS_STRING(tag);
1165 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 if (p[i] == '{')
1167 check = 0;
1168 else if (p[i] == '}')
1169 check = 1;
1170 else if (check && PATHCHAR(p[i]))
1171 return 1;
1172 }
1173 return 0;
1174 }
1175
1176 return 1; /* unknown type; might be path expression */
1177}
1178
Serhiy Storchakacb985562015-05-04 15:32:48 +03001179/*[clinic input]
1180_elementtree.Element.extend
1181
1182 elements: object
1183 /
1184
1185[clinic start generated code]*/
1186
1187static PyObject *
1188_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1189/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001190{
1191 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001192 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193
Serhiy Storchakacb985562015-05-04 15:32:48 +03001194 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001195 if (!seq) {
1196 PyErr_Format(
1197 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001199 );
1200 return NULL;
1201 }
1202
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001203 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001205 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001206 if (element_add_subelement(self, element) < 0) {
1207 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001209 return NULL;
1210 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001211 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212 }
1213
1214 Py_DECREF(seq);
1215
1216 Py_RETURN_NONE;
1217}
1218
Serhiy Storchakacb985562015-05-04 15:32:48 +03001219/*[clinic input]
1220_elementtree.Element.find
1221
1222 path: object
1223 namespaces: object = None
1224
1225[clinic start generated code]*/
1226
1227static PyObject *
1228_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1229 PyObject *namespaces)
1230/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001231{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001232 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001233 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001234
Serhiy Storchakacb985562015-05-04 15:32:48 +03001235 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001236 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001237 return _PyObject_CallMethodIdObjArgs(
1238 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001240 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241
1242 if (!self->extra)
1243 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001244
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245 for (i = 0; i < self->extra->length; i++) {
1246 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001248 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001250 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001253 Py_DECREF(item);
1254 if (rc < 0)
1255 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256 }
1257
1258 Py_RETURN_NONE;
1259}
1260
Serhiy Storchakacb985562015-05-04 15:32:48 +03001261/*[clinic input]
1262_elementtree.Element.findtext
1263
1264 path: object
1265 default: object = None
1266 namespaces: object = None
1267
1268[clinic start generated code]*/
1269
1270static PyObject *
1271_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1272 PyObject *default_value,
1273 PyObject *namespaces)
1274/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001276 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001278 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279
Serhiy Storchakacb985562015-05-04 15:32:48 +03001280 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001281 return _PyObject_CallMethodIdObjArgs(
1282 st->elementpath_obj, &PyId_findtext,
1283 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284 );
1285
1286 if (!self->extra) {
1287 Py_INCREF(default_value);
1288 return default_value;
1289 }
1290
1291 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001292 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001294 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001298 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001299 if (text == Py_None) {
1300 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001301 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001303 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001304 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 return text;
1306 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001307 Py_DECREF(item);
1308 if (rc < 0)
1309 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001310 }
1311
1312 Py_INCREF(default_value);
1313 return default_value;
1314}
1315
Serhiy Storchakacb985562015-05-04 15:32:48 +03001316/*[clinic input]
1317_elementtree.Element.findall
1318
1319 path: object
1320 namespaces: object = None
1321
1322[clinic start generated code]*/
1323
1324static PyObject *
1325_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1326 PyObject *namespaces)
1327/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001328{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001329 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001330 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001331 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001332
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001333 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001334 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001335 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001336 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001337 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001338 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001339
1340 out = PyList_New(0);
1341 if (!out)
1342 return NULL;
1343
1344 if (!self->extra)
1345 return out;
1346
1347 for (i = 0; i < self->extra->length; i++) {
1348 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001349 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001350 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001351 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001352 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001353 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1354 Py_DECREF(item);
1355 Py_DECREF(out);
1356 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001357 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001358 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001359 }
1360
1361 return out;
1362}
1363
Serhiy Storchakacb985562015-05-04 15:32:48 +03001364/*[clinic input]
1365_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001366
Serhiy Storchakacb985562015-05-04 15:32:48 +03001367 path: object
1368 namespaces: object = None
1369
1370[clinic start generated code]*/
1371
1372static PyObject *
1373_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1374 PyObject *namespaces)
1375/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1376{
1377 PyObject* tag = path;
1378 _Py_IDENTIFIER(iterfind);
1379 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001380
Victor Stinnerf5616342016-12-09 15:26:00 +01001381 return _PyObject_CallMethodIdObjArgs(
1382 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001383}
1384
Serhiy Storchakacb985562015-05-04 15:32:48 +03001385/*[clinic input]
1386_elementtree.Element.get
1387
1388 key: object
1389 default: object = None
1390
1391[clinic start generated code]*/
1392
1393static PyObject *
1394_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1395 PyObject *default_value)
1396/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397{
1398 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399
1400 if (!self->extra || self->extra->attrib == Py_None)
1401 value = default_value;
1402 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001403 value = PyDict_GetItemWithError(self->extra->attrib, key);
1404 if (!value) {
1405 if (PyErr_Occurred()) {
1406 return NULL;
1407 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001409 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410 }
1411
1412 Py_INCREF(value);
1413 return value;
1414}
1415
Serhiy Storchakacb985562015-05-04 15:32:48 +03001416/*[clinic input]
1417_elementtree.Element.getchildren
1418
1419[clinic start generated code]*/
1420
1421static PyObject *
1422_elementtree_Element_getchildren_impl(ElementObject *self)
1423/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001425 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001426 PyObject* list;
1427
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001428 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1429 "This method will be removed in future versions. "
1430 "Use 'list(elem)' or iteration over elem instead.",
1431 1) < 0) {
1432 return NULL;
1433 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001434
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435 if (!self->extra)
1436 return PyList_New(0);
1437
1438 list = PyList_New(self->extra->length);
1439 if (!list)
1440 return NULL;
1441
1442 for (i = 0; i < self->extra->length; i++) {
1443 PyObject* item = self->extra->children[i];
1444 Py_INCREF(item);
1445 PyList_SET_ITEM(list, i, item);
1446 }
1447
1448 return list;
1449}
1450
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001451
Eli Bendersky64d11e62012-06-15 07:42:50 +03001452static PyObject *
1453create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1454
1455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456/*[clinic input]
1457_elementtree.Element.iter
1458
1459 tag: object = None
1460
1461[clinic start generated code]*/
1462
Eli Bendersky64d11e62012-06-15 07:42:50 +03001463static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001464_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1465/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001466{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001467 if (PyUnicode_Check(tag)) {
1468 if (PyUnicode_READY(tag) < 0)
1469 return NULL;
1470 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1471 tag = Py_None;
1472 }
1473 else if (PyBytes_Check(tag)) {
1474 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1475 tag = Py_None;
1476 }
1477
Eli Bendersky64d11e62012-06-15 07:42:50 +03001478 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001479}
1480
1481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001483_elementtree.Element.getiterator
1484
1485 tag: object = None
1486
1487[clinic start generated code]*/
1488
1489static PyObject *
1490_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1491/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1492{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03001493 if (PyErr_WarnEx(PyExc_DeprecationWarning,
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001494 "This method will be removed in future versions. "
1495 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1496 1) < 0) {
1497 return NULL;
1498 }
1499 return _elementtree_Element_iter_impl(self, tag);
1500}
1501
1502
1503/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001504_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001505
Serhiy Storchakacb985562015-05-04 15:32:48 +03001506[clinic start generated code]*/
1507
1508static PyObject *
1509_elementtree_Element_itertext_impl(ElementObject *self)
1510/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1511{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001512 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513}
1514
Eli Bendersky64d11e62012-06-15 07:42:50 +03001515
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001517element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001519 ElementObject* self = (ElementObject*) self_;
1520
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521 if (!self->extra || index < 0 || index >= self->extra->length) {
1522 PyErr_SetString(
1523 PyExc_IndexError,
1524 "child index out of range"
1525 );
1526 return NULL;
1527 }
1528
1529 Py_INCREF(self->extra->children[index]);
1530 return self->extra->children[index];
1531}
1532
Serhiy Storchakacb985562015-05-04 15:32:48 +03001533/*[clinic input]
1534_elementtree.Element.insert
1535
1536 index: Py_ssize_t
1537 subelement: object(subclass_of='&Element_Type')
1538 /
1539
1540[clinic start generated code]*/
1541
1542static PyObject *
1543_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1544 PyObject *subelement)
1545/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001547 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548
Victor Stinner5f0af232013-07-11 23:01:36 +02001549 if (!self->extra) {
1550 if (create_extra(self, NULL) < 0)
1551 return NULL;
1552 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001553
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001554 if (index < 0) {
1555 index += self->extra->length;
1556 if (index < 0)
1557 index = 0;
1558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001559 if (index > self->extra->length)
1560 index = self->extra->length;
1561
1562 if (element_resize(self, 1) < 0)
1563 return NULL;
1564
1565 for (i = self->extra->length; i > index; i--)
1566 self->extra->children[i] = self->extra->children[i-1];
1567
Serhiy Storchakacb985562015-05-04 15:32:48 +03001568 Py_INCREF(subelement);
1569 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570
1571 self->extra->length++;
1572
1573 Py_RETURN_NONE;
1574}
1575
Serhiy Storchakacb985562015-05-04 15:32:48 +03001576/*[clinic input]
1577_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578
Serhiy Storchakacb985562015-05-04 15:32:48 +03001579[clinic start generated code]*/
1580
1581static PyObject *
1582_elementtree_Element_items_impl(ElementObject *self)
1583/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1584{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585 if (!self->extra || self->extra->attrib == Py_None)
1586 return PyList_New(0);
1587
1588 return PyDict_Items(self->extra->attrib);
1589}
1590
Serhiy Storchakacb985562015-05-04 15:32:48 +03001591/*[clinic input]
1592_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593
Serhiy Storchakacb985562015-05-04 15:32:48 +03001594[clinic start generated code]*/
1595
1596static PyObject *
1597_elementtree_Element_keys_impl(ElementObject *self)
1598/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1599{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 if (!self->extra || self->extra->attrib == Py_None)
1601 return PyList_New(0);
1602
1603 return PyDict_Keys(self->extra->attrib);
1604}
1605
Martin v. Löwis18e16552006-02-15 17:27:45 +00001606static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607element_length(ElementObject* self)
1608{
1609 if (!self->extra)
1610 return 0;
1611
1612 return self->extra->length;
1613}
1614
Serhiy Storchakacb985562015-05-04 15:32:48 +03001615/*[clinic input]
1616_elementtree.Element.makeelement
1617
1618 tag: object
1619 attrib: object
1620 /
1621
1622[clinic start generated code]*/
1623
1624static PyObject *
1625_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1626 PyObject *attrib)
1627/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628{
1629 PyObject* elem;
1630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 attrib = PyDict_Copy(attrib);
1632 if (!attrib)
1633 return NULL;
1634
Eli Bendersky092af1f2012-03-04 07:14:03 +02001635 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
1637 Py_DECREF(attrib);
1638
1639 return elem;
1640}
1641
Serhiy Storchakacb985562015-05-04 15:32:48 +03001642/*[clinic input]
1643_elementtree.Element.remove
1644
1645 subelement: object(subclass_of='&Element_Type')
1646 /
1647
1648[clinic start generated code]*/
1649
1650static PyObject *
1651_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1652/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001654 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001655 int rc;
1656 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 if (!self->extra) {
1659 /* element has no children, so raise exception */
1660 PyErr_SetString(
1661 PyExc_ValueError,
1662 "list.remove(x): x not in list"
1663 );
1664 return NULL;
1665 }
1666
1667 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001668 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001670 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001671 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001673 if (rc < 0)
1674 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001675 }
1676
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001677 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001678 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001679 PyErr_SetString(
1680 PyExc_ValueError,
1681 "list.remove(x): x not in list"
1682 );
1683 return NULL;
1684 }
1685
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001686 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001687
1688 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001689 for (; i < self->extra->length; i++)
1690 self->extra->children[i] = self->extra->children[i+1];
1691
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001692 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693 Py_RETURN_NONE;
1694}
1695
1696static PyObject*
1697element_repr(ElementObject* self)
1698{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001699 int status;
1700
1701 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001702 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001703
1704 status = Py_ReprEnter((PyObject *)self);
1705 if (status == 0) {
1706 PyObject *res;
1707 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1708 Py_ReprLeave((PyObject *)self);
1709 return res;
1710 }
1711 if (status > 0)
1712 PyErr_Format(PyExc_RuntimeError,
1713 "reentrant call inside %s.__repr__",
1714 Py_TYPE(self)->tp_name);
1715 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001716}
1717
Serhiy Storchakacb985562015-05-04 15:32:48 +03001718/*[clinic input]
1719_elementtree.Element.set
1720
1721 key: object
1722 value: object
1723 /
1724
1725[clinic start generated code]*/
1726
1727static PyObject *
1728_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1729 PyObject *value)
1730/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001731{
1732 PyObject* attrib;
1733
Victor Stinner5f0af232013-07-11 23:01:36 +02001734 if (!self->extra) {
1735 if (create_extra(self, NULL) < 0)
1736 return NULL;
1737 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001738
1739 attrib = element_get_attrib(self);
1740 if (!attrib)
1741 return NULL;
1742
1743 if (PyDict_SetItem(attrib, key, value) < 0)
1744 return NULL;
1745
1746 Py_RETURN_NONE;
1747}
1748
1749static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001750element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001751{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001752 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001753 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001754 PyObject* old;
1755
1756 if (!self->extra || index < 0 || index >= self->extra->length) {
1757 PyErr_SetString(
1758 PyExc_IndexError,
1759 "child assignment index out of range");
1760 return -1;
1761 }
1762
1763 old = self->extra->children[index];
1764
1765 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001766 if (!Element_Check(item)) {
1767 raise_type_error(item);
1768 return -1;
1769 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001770 Py_INCREF(item);
1771 self->extra->children[index] = item;
1772 } else {
1773 self->extra->length--;
1774 for (i = index; i < self->extra->length; i++)
1775 self->extra->children[i] = self->extra->children[i+1];
1776 }
1777
1778 Py_DECREF(old);
1779
1780 return 0;
1781}
1782
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001783static PyObject*
1784element_subscr(PyObject* self_, PyObject* item)
1785{
1786 ElementObject* self = (ElementObject*) self_;
1787
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001788 if (PyIndex_Check(item)) {
1789 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001790
1791 if (i == -1 && PyErr_Occurred()) {
1792 return NULL;
1793 }
1794 if (i < 0 && self->extra)
1795 i += self->extra->length;
1796 return element_getitem(self_, i);
1797 }
1798 else if (PySlice_Check(item)) {
1799 Py_ssize_t start, stop, step, slicelen, cur, i;
1800 PyObject* list;
1801
1802 if (!self->extra)
1803 return PyList_New(0);
1804
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001805 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001806 return NULL;
1807 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001808 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1809 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001810
1811 if (slicelen <= 0)
1812 return PyList_New(0);
1813 else {
1814 list = PyList_New(slicelen);
1815 if (!list)
1816 return NULL;
1817
1818 for (cur = start, i = 0; i < slicelen;
1819 cur += step, i++) {
1820 PyObject* item = self->extra->children[cur];
1821 Py_INCREF(item);
1822 PyList_SET_ITEM(list, i, item);
1823 }
1824
1825 return list;
1826 }
1827 }
1828 else {
1829 PyErr_SetString(PyExc_TypeError,
1830 "element indices must be integers");
1831 return NULL;
1832 }
1833}
1834
1835static int
1836element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1837{
1838 ElementObject* self = (ElementObject*) self_;
1839
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001840 if (PyIndex_Check(item)) {
1841 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001842
1843 if (i == -1 && PyErr_Occurred()) {
1844 return -1;
1845 }
1846 if (i < 0 && self->extra)
1847 i += self->extra->length;
1848 return element_setitem(self_, i, value);
1849 }
1850 else if (PySlice_Check(item)) {
1851 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1852
1853 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001854 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
Victor Stinner5f0af232013-07-11 23:01:36 +02001856 if (!self->extra) {
1857 if (create_extra(self, NULL) < 0)
1858 return -1;
1859 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001860
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001861 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001862 return -1;
1863 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001864 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1865 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866
Eli Bendersky865756a2012-03-09 13:38:15 +02001867 if (value == NULL) {
1868 /* Delete slice */
1869 size_t cur;
1870 Py_ssize_t i;
1871
1872 if (slicelen <= 0)
1873 return 0;
1874
1875 /* Since we're deleting, the direction of the range doesn't matter,
1876 * so for simplicity make it always ascending.
1877 */
1878 if (step < 0) {
1879 stop = start + 1;
1880 start = stop + step * (slicelen - 1) - 1;
1881 step = -step;
1882 }
1883
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001884 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001885
1886 /* recycle is a list that will contain all the children
1887 * scheduled for removal.
1888 */
1889 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001890 return -1;
1891 }
1892
1893 /* This loop walks over all the children that have to be deleted,
1894 * with cur pointing at them. num_moved is the amount of children
1895 * until the next deleted child that have to be "shifted down" to
1896 * occupy the deleted's places.
1897 * Note that in the ith iteration, shifting is done i+i places down
1898 * because i children were already removed.
1899 */
1900 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1901 /* Compute how many children have to be moved, clipping at the
1902 * list end.
1903 */
1904 Py_ssize_t num_moved = step - 1;
1905 if (cur + step >= (size_t)self->extra->length) {
1906 num_moved = self->extra->length - cur - 1;
1907 }
1908
1909 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1910
1911 memmove(
1912 self->extra->children + cur - i,
1913 self->extra->children + cur + 1,
1914 num_moved * sizeof(PyObject *));
1915 }
1916
1917 /* Leftover "tail" after the last removed child */
1918 cur = start + (size_t)slicelen * step;
1919 if (cur < (size_t)self->extra->length) {
1920 memmove(
1921 self->extra->children + cur - slicelen,
1922 self->extra->children + cur,
1923 (self->extra->length - cur) * sizeof(PyObject *));
1924 }
1925
1926 self->extra->length -= slicelen;
1927
1928 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001929 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001930 return 0;
1931 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001932
1933 /* A new slice is actually being assigned */
1934 seq = PySequence_Fast(value, "");
1935 if (!seq) {
1936 PyErr_Format(
1937 PyExc_TypeError,
1938 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1939 );
1940 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001941 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001942 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001943
1944 if (step != 1 && newlen != slicelen)
1945 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001946 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001948 "attempt to assign sequence of size %zd "
1949 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001950 newlen, slicelen
1951 );
1952 return -1;
1953 }
1954
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955 /* Resize before creating the recycle bin, to prevent refleaks. */
1956 if (newlen > slicelen) {
1957 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001958 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001959 return -1;
1960 }
1961 }
1962
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001963 for (i = 0; i < newlen; i++) {
1964 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1965 if (!Element_Check(element)) {
1966 raise_type_error(element);
1967 Py_DECREF(seq);
1968 return -1;
1969 }
1970 }
1971
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001972 if (slicelen > 0) {
1973 /* to avoid recursive calls to this method (via decref), move
1974 old items to the recycle bin here, and get rid of them when
1975 we're done modifying the element */
1976 recycle = PyList_New(slicelen);
1977 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001978 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001979 return -1;
1980 }
1981 for (cur = start, i = 0; i < slicelen;
1982 cur += step, i++)
1983 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1984 }
1985
1986 if (newlen < slicelen) {
1987 /* delete slice */
1988 for (i = stop; i < self->extra->length; i++)
1989 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1990 } else if (newlen > slicelen) {
1991 /* insert slice */
1992 for (i = self->extra->length-1; i >= stop; i--)
1993 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1994 }
1995
1996 /* replace the slice */
1997 for (cur = start, i = 0; i < newlen;
1998 cur += step, i++) {
1999 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
2000 Py_INCREF(element);
2001 self->extra->children[cur] = element;
2002 }
2003
2004 self->extra->length += newlen - slicelen;
2005
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02002006 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002007
2008 /* discard the recycle bin, and everything in it */
2009 Py_XDECREF(recycle);
2010
2011 return 0;
2012 }
2013 else {
2014 PyErr_SetString(PyExc_TypeError,
2015 "element indices must be integers");
2016 return -1;
2017 }
2018}
2019
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002020static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02002021element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002022{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002023 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002024 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002025 return res;
2026}
2027
Serhiy Storchakadde08152015-11-25 15:28:13 +02002028static PyObject*
2029element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002030{
Serhiy Storchakadde08152015-11-25 15:28:13 +02002031 PyObject *res = element_get_text(self);
2032 Py_XINCREF(res);
2033 return res;
2034}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002035
Serhiy Storchakadde08152015-11-25 15:28:13 +02002036static PyObject*
2037element_tail_getter(ElementObject *self, void *closure)
2038{
2039 PyObject *res = element_get_tail(self);
2040 Py_XINCREF(res);
2041 return res;
2042}
2043
2044static PyObject*
2045element_attrib_getter(ElementObject *self, void *closure)
2046{
2047 PyObject *res;
2048 if (!self->extra) {
2049 if (create_extra(self, NULL) < 0)
2050 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002051 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002052 res = element_get_attrib(self);
2053 Py_XINCREF(res);
2054 return res;
2055}
Victor Stinner4d463432013-07-11 23:05:03 +02002056
Serhiy Storchakadde08152015-11-25 15:28:13 +02002057/* macro for setter validation */
2058#define _VALIDATE_ATTR_VALUE(V) \
2059 if ((V) == NULL) { \
2060 PyErr_SetString( \
2061 PyExc_AttributeError, \
2062 "can't delete element attribute"); \
2063 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002064 }
2065
Serhiy Storchakadde08152015-11-25 15:28:13 +02002066static int
2067element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2068{
2069 _VALIDATE_ATTR_VALUE(value);
2070 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002071 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002072 return 0;
2073}
2074
2075static int
2076element_text_setter(ElementObject *self, PyObject *value, void *closure)
2077{
2078 _VALIDATE_ATTR_VALUE(value);
2079 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002080 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002081 return 0;
2082}
2083
2084static int
2085element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2086{
2087 _VALIDATE_ATTR_VALUE(value);
2088 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002089 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002090 return 0;
2091}
2092
2093static int
2094element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2095{
2096 _VALIDATE_ATTR_VALUE(value);
2097 if (!self->extra) {
2098 if (create_extra(self, NULL) < 0)
2099 return -1;
2100 }
2101 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002102 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002103 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002104}
2105
2106static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002107 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002108 0, /* sq_concat */
2109 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002110 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002111 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002112 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002113 0,
2114};
2115
Eli Bendersky64d11e62012-06-15 07:42:50 +03002116/******************************* Element iterator ****************************/
2117
2118/* ElementIterObject represents the iteration state over an XML element in
2119 * pre-order traversal. To keep track of which sub-element should be returned
2120 * next, a stack of parents is maintained. This is a standard stack-based
2121 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122 * The stack is managed using a continuous array.
2123 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 * the current one is exhausted, and the next child to examine in that parent.
2125 */
2126typedef struct ParentLocator_t {
2127 ElementObject *parent;
2128 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129} ParentLocator;
2130
2131typedef struct {
2132 PyObject_HEAD
2133 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 Py_ssize_t parent_stack_used;
2135 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002136 ElementObject *root_element;
2137 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138 int gettext;
2139} ElementIterObject;
2140
2141
2142static void
2143elementiter_dealloc(ElementIterObject *it)
2144{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002145 Py_ssize_t i = it->parent_stack_used;
2146 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002147 /* bpo-31095: UnTrack is needed before calling any callbacks */
2148 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002149 while (i--)
2150 Py_XDECREF(it->parent_stack[i].parent);
2151 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152
2153 Py_XDECREF(it->sought_tag);
2154 Py_XDECREF(it->root_element);
2155
Eli Bendersky64d11e62012-06-15 07:42:50 +03002156 PyObject_GC_Del(it);
2157}
2158
2159static int
2160elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2161{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002162 Py_ssize_t i = it->parent_stack_used;
2163 while (i--)
2164 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002165
2166 Py_VISIT(it->root_element);
2167 Py_VISIT(it->sought_tag);
2168 return 0;
2169}
2170
2171/* Helper function for elementiter_next. Add a new parent to the parent stack.
2172 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002173static int
2174parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002175{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 ParentLocator *item;
2177
2178 if (it->parent_stack_used >= it->parent_stack_size) {
2179 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2180 ParentLocator *parent_stack = it->parent_stack;
2181 PyMem_Resize(parent_stack, ParentLocator, new_size);
2182 if (parent_stack == NULL)
2183 return -1;
2184 it->parent_stack = parent_stack;
2185 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002187 item = it->parent_stack + it->parent_stack_used++;
2188 Py_INCREF(parent);
2189 item->parent = parent;
2190 item->child_index = 0;
2191 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002192}
2193
2194static PyObject *
2195elementiter_next(ElementIterObject *it)
2196{
2197 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002198 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002199 * A short note on gettext: this function serves both the iter() and
2200 * itertext() methods to avoid code duplication. However, there are a few
2201 * small differences in the way these iterations work. Namely:
2202 * - itertext() only yields text from nodes that have it, and continues
2203 * iterating when a node doesn't have text (so it doesn't return any
2204 * node like iter())
2205 * - itertext() also has to handle tail, after finishing with all the
2206 * children of a node.
2207 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002208 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002209 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002210 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002211
2212 while (1) {
2213 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002214 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002215 * iterator is exhausted.
2216 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002217 if (!it->parent_stack_used) {
2218 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002219 PyErr_SetNone(PyExc_StopIteration);
2220 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002221 }
2222
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002223 elem = it->root_element; /* steals a reference */
2224 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002225 }
2226 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002227 /* See if there are children left to traverse in the current parent. If
2228 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002229 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002230 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2231 Py_ssize_t child_index = item->child_index;
2232 ElementObjectExtra *extra;
2233 elem = item->parent;
2234 extra = elem->extra;
2235 if (!extra || child_index >= extra->length) {
2236 it->parent_stack_used--;
2237 /* Note that extra condition on it->parent_stack_used here;
2238 * this is because itertext() is supposed to only return *inner*
2239 * text, not text following the element it began iteration with.
2240 */
2241 if (it->gettext && it->parent_stack_used) {
2242 text = element_get_tail(elem);
2243 goto gettext;
2244 }
2245 Py_DECREF(elem);
2246 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002247 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002248
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002249 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002250 elem = (ElementObject *)extra->children[child_index];
2251 item->child_index++;
2252 Py_INCREF(elem);
2253 }
2254
2255 if (parent_stack_push_new(it, elem) < 0) {
2256 Py_DECREF(elem);
2257 PyErr_NoMemory();
2258 return NULL;
2259 }
2260 if (it->gettext) {
2261 text = element_get_text(elem);
2262 goto gettext;
2263 }
2264
2265 if (it->sought_tag == Py_None)
2266 return (PyObject *)elem;
2267
2268 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2269 if (rc > 0)
2270 return (PyObject *)elem;
2271
2272 Py_DECREF(elem);
2273 if (rc < 0)
2274 return NULL;
2275 continue;
2276
2277gettext:
2278 if (!text) {
2279 Py_DECREF(elem);
2280 return NULL;
2281 }
2282 if (text == Py_None) {
2283 Py_DECREF(elem);
2284 }
2285 else {
2286 Py_INCREF(text);
2287 Py_DECREF(elem);
2288 rc = PyObject_IsTrue(text);
2289 if (rc > 0)
2290 return text;
2291 Py_DECREF(text);
2292 if (rc < 0)
2293 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 }
2295 }
2296
2297 return NULL;
2298}
2299
2300
2301static PyTypeObject ElementIter_Type = {
2302 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002303 /* Using the module's name since the pure-Python implementation does not
2304 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002305 "_elementtree._element_iterator", /* tp_name */
2306 sizeof(ElementIterObject), /* tp_basicsize */
2307 0, /* tp_itemsize */
2308 /* methods */
2309 (destructor)elementiter_dealloc, /* tp_dealloc */
2310 0, /* tp_print */
2311 0, /* tp_getattr */
2312 0, /* tp_setattr */
2313 0, /* tp_reserved */
2314 0, /* tp_repr */
2315 0, /* tp_as_number */
2316 0, /* tp_as_sequence */
2317 0, /* tp_as_mapping */
2318 0, /* tp_hash */
2319 0, /* tp_call */
2320 0, /* tp_str */
2321 0, /* tp_getattro */
2322 0, /* tp_setattro */
2323 0, /* tp_as_buffer */
2324 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2325 0, /* tp_doc */
2326 (traverseproc)elementiter_traverse, /* tp_traverse */
2327 0, /* tp_clear */
2328 0, /* tp_richcompare */
2329 0, /* tp_weaklistoffset */
2330 PyObject_SelfIter, /* tp_iter */
2331 (iternextfunc)elementiter_next, /* tp_iternext */
2332 0, /* tp_methods */
2333 0, /* tp_members */
2334 0, /* tp_getset */
2335 0, /* tp_base */
2336 0, /* tp_dict */
2337 0, /* tp_descr_get */
2338 0, /* tp_descr_set */
2339 0, /* tp_dictoffset */
2340 0, /* tp_init */
2341 0, /* tp_alloc */
2342 0, /* tp_new */
2343};
2344
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002345#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002346
2347static PyObject *
2348create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2349{
2350 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002351
2352 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2353 if (!it)
2354 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002355
Victor Stinner4d463432013-07-11 23:05:03 +02002356 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002357 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002358 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002359 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002360 it->root_element = self;
2361
Eli Bendersky64d11e62012-06-15 07:42:50 +03002362 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002363
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002364 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002365 if (it->parent_stack == NULL) {
2366 Py_DECREF(it);
2367 PyErr_NoMemory();
2368 return NULL;
2369 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002370 it->parent_stack_used = 0;
2371 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002372
Eli Bendersky64d11e62012-06-15 07:42:50 +03002373 return (PyObject *)it;
2374}
2375
2376
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377/* ==================================================================== */
2378/* the tree builder type */
2379
2380typedef struct {
2381 PyObject_HEAD
2382
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
Antoine Pitrouee329312012-10-04 19:53:29 +02002385 PyObject *this; /* current node */
2386 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 PyObject *stack; /* element stack */
2391 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392
Eli Bendersky48d358b2012-05-30 17:57:50 +03002393 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002394 PyObject *comment_factory;
2395 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002396
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002398 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002399 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2400 PyObject *end_event_obj;
2401 PyObject *start_ns_event_obj;
2402 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002403 PyObject *comment_event_obj;
2404 PyObject *pi_event_obj;
2405
2406 char insert_comments;
2407 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408} TreeBuilderObject;
2409
Christian Heimes90aa7642007-12-19 02:45:37 +00002410#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411
2412/* -------------------------------------------------------------------- */
2413/* constructor and destructor */
2414
Eli Bendersky58d548d2012-05-29 15:45:16 +03002415static PyObject *
2416treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002418 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2419 if (t != NULL) {
2420 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421
Eli Bendersky58d548d2012-05-29 15:45:16 +03002422 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002423 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002424 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002425 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002426
Eli Bendersky58d548d2012-05-29 15:45:16 +03002427 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002428 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002429 t->comment_factory = NULL;
2430 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431 t->stack = PyList_New(20);
2432 if (!t->stack) {
2433 Py_DECREF(t->this);
2434 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002435 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002436 return NULL;
2437 }
2438 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002439
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002440 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002441 t->start_event_obj = t->end_event_obj = NULL;
2442 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002443 t->comment_event_obj = t->pi_event_obj = NULL;
2444 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002445 }
2446 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002447}
2448
Serhiy Storchakacb985562015-05-04 15:32:48 +03002449/*[clinic input]
2450_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002451
Serhiy Storchakacb985562015-05-04 15:32:48 +03002452 element_factory: object = NULL
Stefan Behnel43851a22019-05-01 21:20:38 +02002453 *
2454 comment_factory: object = NULL
2455 pi_factory: object = NULL
2456 insert_comments: bool = False
2457 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002458
2459[clinic start generated code]*/
2460
2461static int
2462_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002463 PyObject *element_factory,
2464 PyObject *comment_factory,
2465 PyObject *pi_factory,
2466 int insert_comments, int insert_pis)
2467/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002468{
Stefan Behnel43851a22019-05-01 21:20:38 +02002469 if (element_factory && element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002470 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002471 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002472 } else {
2473 Py_CLEAR(self->element_factory);
2474 }
2475
2476 if (!comment_factory || comment_factory == Py_None) {
2477 elementtreestate *st = ET_STATE_GLOBAL;
2478 comment_factory = st->comment_factory;
2479 }
2480 if (comment_factory) {
2481 Py_INCREF(comment_factory);
2482 Py_XSETREF(self->comment_factory, comment_factory);
2483 self->insert_comments = insert_comments;
2484 } else {
2485 Py_CLEAR(self->comment_factory);
2486 self->insert_comments = 0;
2487 }
2488
2489 if (!pi_factory || pi_factory == Py_None) {
2490 elementtreestate *st = ET_STATE_GLOBAL;
2491 pi_factory = st->pi_factory;
2492 }
2493 if (pi_factory) {
2494 Py_INCREF(pi_factory);
2495 Py_XSETREF(self->pi_factory, pi_factory);
2496 self->insert_pis = insert_pis;
2497 } else {
2498 Py_CLEAR(self->pi_factory);
2499 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002500 }
2501
Eli Bendersky58d548d2012-05-29 15:45:16 +03002502 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002503}
2504
Eli Bendersky48d358b2012-05-30 17:57:50 +03002505static int
2506treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2507{
Stefan Behnel43851a22019-05-01 21:20:38 +02002508 Py_VISIT(self->pi_event_obj);
2509 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002510 Py_VISIT(self->end_ns_event_obj);
2511 Py_VISIT(self->start_ns_event_obj);
2512 Py_VISIT(self->end_event_obj);
2513 Py_VISIT(self->start_event_obj);
2514 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002515 Py_VISIT(self->root);
2516 Py_VISIT(self->this);
2517 Py_VISIT(self->last);
2518 Py_VISIT(self->data);
2519 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002520 Py_VISIT(self->pi_factory);
2521 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002522 Py_VISIT(self->element_factory);
2523 return 0;
2524}
2525
2526static int
2527treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528{
Stefan Behnel43851a22019-05-01 21:20:38 +02002529 Py_CLEAR(self->pi_event_obj);
2530 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002531 Py_CLEAR(self->end_ns_event_obj);
2532 Py_CLEAR(self->start_ns_event_obj);
2533 Py_CLEAR(self->end_event_obj);
2534 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002535 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002536 Py_CLEAR(self->stack);
2537 Py_CLEAR(self->data);
2538 Py_CLEAR(self->last);
2539 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002540 Py_CLEAR(self->pi_factory);
2541 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002542 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002543 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002544 return 0;
2545}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546
Eli Bendersky48d358b2012-05-30 17:57:50 +03002547static void
2548treebuilder_dealloc(TreeBuilderObject *self)
2549{
2550 PyObject_GC_UnTrack(self);
2551 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002552 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553}
2554
2555/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002556/* helpers for handling of arbitrary element-like objects */
2557
Stefan Behnel43851a22019-05-01 21:20:38 +02002558/*[clinic input]
2559_elementtree._set_factories
2560
2561 comment_factory: object
2562 pi_factory: object
2563 /
2564
2565Change the factories used to create comments and processing instructions.
2566
2567For internal use only.
2568[clinic start generated code]*/
2569
2570static PyObject *
2571_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2572 PyObject *pi_factory)
2573/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2574{
2575 elementtreestate *st = ET_STATE_GLOBAL;
2576 PyObject *old;
2577
2578 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2579 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2580 Py_TYPE(comment_factory)->tp_name);
2581 return NULL;
2582 }
2583 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2584 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2585 Py_TYPE(pi_factory)->tp_name);
2586 return NULL;
2587 }
2588
2589 old = PyTuple_Pack(2,
2590 st->comment_factory ? st->comment_factory : Py_None,
2591 st->pi_factory ? st->pi_factory : Py_None);
2592
2593 if (comment_factory == Py_None) {
2594 Py_CLEAR(st->comment_factory);
2595 } else {
2596 Py_INCREF(comment_factory);
2597 Py_XSETREF(st->comment_factory, comment_factory);
2598 }
2599 if (pi_factory == Py_None) {
2600 Py_CLEAR(st->pi_factory);
2601 } else {
2602 Py_INCREF(pi_factory);
2603 Py_XSETREF(st->pi_factory, pi_factory);
2604 }
2605
2606 return old;
2607}
2608
Antoine Pitrouee329312012-10-04 19:53:29 +02002609static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002610treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002611 PyObject **dest, _Py_Identifier *name)
2612{
2613 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002614 PyObject *tmp = JOIN_OBJ(*dest);
2615 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2616 *data = NULL;
2617 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002618 return 0;
2619 }
2620 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002621 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002622 int r;
2623 if (joined == NULL)
2624 return -1;
2625 r = _PyObject_SetAttrId(element, name, joined);
2626 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002627 if (r < 0)
2628 return -1;
2629 Py_CLEAR(*data);
2630 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002631 }
2632}
2633
Serhiy Storchaka576def02017-03-30 09:47:31 +03002634LOCAL(int)
2635treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002636{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002637 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002638
Serhiy Storchaka576def02017-03-30 09:47:31 +03002639 if (!self->data) {
2640 return 0;
2641 }
2642
2643 if (self->this == element) {
2644 _Py_IDENTIFIER(text);
2645 return treebuilder_set_element_text_or_tail(
2646 element, &self->data,
2647 &((ElementObject *) element)->text, &PyId_text);
2648 }
2649 else {
2650 _Py_IDENTIFIER(tail);
2651 return treebuilder_set_element_text_or_tail(
2652 element, &self->data,
2653 &((ElementObject *) element)->tail, &PyId_tail);
2654 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002655}
2656
2657static int
2658treebuilder_add_subelement(PyObject *element, PyObject *child)
2659{
2660 _Py_IDENTIFIER(append);
2661 if (Element_CheckExact(element)) {
2662 ElementObject *elem = (ElementObject *) element;
2663 return element_add_subelement(elem, child);
2664 }
2665 else {
2666 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002667 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002668 if (res == NULL)
2669 return -1;
2670 Py_DECREF(res);
2671 return 0;
2672 }
2673}
2674
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002675LOCAL(int)
2676treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2677 PyObject *node)
2678{
2679 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002680 PyObject *res;
2681 PyObject *event = PyTuple_Pack(2, action, node);
2682 if (event == NULL)
2683 return -1;
Stefan Behnel43851a22019-05-01 21:20:38 +02002684 res = _PyObject_FastCall(self->events_append, &event, 1);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002685 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002686 if (res == NULL)
2687 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002688 Py_DECREF(res);
2689 }
2690 return 0;
2691}
2692
Antoine Pitrouee329312012-10-04 19:53:29 +02002693/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694/* handlers */
2695
2696LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2698 PyObject* attrib)
2699{
2700 PyObject* node;
2701 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002702 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703
Serhiy Storchaka576def02017-03-30 09:47:31 +03002704 if (treebuilder_flush_data(self) < 0) {
2705 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706 }
2707
Stefan Behnel43851a22019-05-01 21:20:38 +02002708 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002709 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002710 } else if (attrib == Py_None) {
2711 attrib = PyDict_New();
2712 if (!attrib)
2713 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002714 node = PyObject_CallFunctionObjArgs(self->element_factory,
2715 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002716 Py_DECREF(attrib);
2717 }
2718 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002719 node = PyObject_CallFunctionObjArgs(self->element_factory,
2720 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002721 }
2722 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002724 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725
Antoine Pitrouee329312012-10-04 19:53:29 +02002726 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727
2728 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002729 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002730 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 } else {
2732 if (self->root) {
2733 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002734 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735 "multiple elements on top level"
2736 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002737 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738 }
2739 Py_INCREF(node);
2740 self->root = node;
2741 }
2742
2743 if (self->index < PyList_GET_SIZE(self->stack)) {
2744 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002745 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 Py_INCREF(this);
2747 } else {
2748 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002749 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750 }
2751 self->index++;
2752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002754 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002756 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002758 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2759 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
2761 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002762
2763 error:
2764 Py_DECREF(node);
2765 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766}
2767
2768LOCAL(PyObject*)
2769treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2770{
2771 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002772 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002773 /* ignore calls to data before the first call to start */
2774 Py_RETURN_NONE;
2775 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776 /* store the first item as is */
2777 Py_INCREF(data); self->data = data;
2778 } else {
2779 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002780 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2781 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002782 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 /* expat often generates single character data sections; handle
2784 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002785 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2786 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 } else if (PyList_CheckExact(self->data)) {
2790 if (PyList_Append(self->data, data) < 0)
2791 return NULL;
2792 } else {
2793 PyObject* list = PyList_New(2);
2794 if (!list)
2795 return NULL;
2796 PyList_SET_ITEM(list, 0, self->data);
2797 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2798 self->data = list;
2799 }
2800 }
2801
2802 Py_RETURN_NONE;
2803}
2804
2805LOCAL(PyObject*)
2806treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2807{
2808 PyObject* item;
2809
Serhiy Storchaka576def02017-03-30 09:47:31 +03002810 if (treebuilder_flush_data(self) < 0) {
2811 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812 }
2813
2814 if (self->index == 0) {
2815 PyErr_SetString(
2816 PyExc_IndexError,
2817 "pop from empty stack"
2818 );
2819 return NULL;
2820 }
2821
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002822 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002823 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002824 self->index--;
2825 self->this = PyList_GET_ITEM(self->stack, self->index);
2826 Py_INCREF(self->this);
2827 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002829 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2830 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831
2832 Py_INCREF(self->last);
2833 return (PyObject*) self->last;
2834}
2835
Stefan Behnel43851a22019-05-01 21:20:38 +02002836LOCAL(PyObject*)
2837treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2838{
2839 PyObject* comment = NULL;
2840 PyObject* this;
2841
2842 if (treebuilder_flush_data(self) < 0) {
2843 return NULL;
2844 }
2845
2846 if (self->comment_factory) {
2847 comment = _PyObject_FastCall(self->comment_factory, &text, 1);
2848 if (!comment)
2849 return NULL;
2850
2851 this = self->this;
2852 if (self->insert_comments && this != Py_None) {
2853 if (treebuilder_add_subelement(this, comment) < 0)
2854 goto error;
2855 }
2856 } else {
2857 Py_INCREF(text);
2858 comment = text;
2859 }
2860
2861 if (self->events_append && self->comment_event_obj) {
2862 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2863 goto error;
2864 }
2865
2866 return comment;
2867
2868 error:
2869 Py_DECREF(comment);
2870 return NULL;
2871}
2872
2873LOCAL(PyObject*)
2874treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2875{
2876 PyObject* pi = NULL;
2877 PyObject* this;
2878 PyObject* stack[2] = {target, text};
2879
2880 if (treebuilder_flush_data(self) < 0) {
2881 return NULL;
2882 }
2883
2884 if (self->pi_factory) {
2885 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2886 if (!pi) {
2887 return NULL;
2888 }
2889
2890 this = self->this;
2891 if (self->insert_pis && this != Py_None) {
2892 if (treebuilder_add_subelement(this, pi) < 0)
2893 goto error;
2894 }
2895 } else {
2896 pi = PyTuple_Pack(2, target, text);
2897 if (!pi) {
2898 return NULL;
2899 }
2900 }
2901
2902 if (self->events_append && self->pi_event_obj) {
2903 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2904 goto error;
2905 }
2906
2907 return pi;
2908
2909 error:
2910 Py_DECREF(pi);
2911 return NULL;
2912}
2913
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914/* -------------------------------------------------------------------- */
2915/* methods (in alphabetical order) */
2916
Serhiy Storchakacb985562015-05-04 15:32:48 +03002917/*[clinic input]
2918_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919
Serhiy Storchakacb985562015-05-04 15:32:48 +03002920 data: object
2921 /
2922
2923[clinic start generated code]*/
2924
2925static PyObject *
2926_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2927/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2928{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 return treebuilder_handle_data(self, data);
2930}
2931
Serhiy Storchakacb985562015-05-04 15:32:48 +03002932/*[clinic input]
2933_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934
Serhiy Storchakacb985562015-05-04 15:32:48 +03002935 tag: object
2936 /
2937
2938[clinic start generated code]*/
2939
2940static PyObject *
2941_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2942/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2943{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 return treebuilder_handle_end(self, tag);
2945}
2946
Stefan Behnel43851a22019-05-01 21:20:38 +02002947/*[clinic input]
2948_elementtree.TreeBuilder.comment
2949
2950 text: object
2951 /
2952
2953[clinic start generated code]*/
2954
2955static PyObject *
2956_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2957/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2958{
2959 return treebuilder_handle_comment(self, text);
2960}
2961
2962/*[clinic input]
2963_elementtree.TreeBuilder.pi
2964
2965 target: object
2966 text: object = None
2967 /
2968
2969[clinic start generated code]*/
2970
2971static PyObject *
2972_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2973 PyObject *text)
2974/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2975{
2976 return treebuilder_handle_pi(self, target, text);
2977}
2978
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979LOCAL(PyObject*)
2980treebuilder_done(TreeBuilderObject* self)
2981{
2982 PyObject* res;
2983
2984 /* FIXME: check stack size? */
2985
2986 if (self->root)
2987 res = self->root;
2988 else
2989 res = Py_None;
2990
2991 Py_INCREF(res);
2992 return res;
2993}
2994
Serhiy Storchakacb985562015-05-04 15:32:48 +03002995/*[clinic input]
2996_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997
Serhiy Storchakacb985562015-05-04 15:32:48 +03002998[clinic start generated code]*/
2999
3000static PyObject *
3001_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3002/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3003{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 return treebuilder_done(self);
3005}
3006
Serhiy Storchakacb985562015-05-04 15:32:48 +03003007/*[clinic input]
3008_elementtree.TreeBuilder.start
3009
3010 tag: object
3011 attrs: object = None
3012 /
3013
3014[clinic start generated code]*/
3015
3016static PyObject *
3017_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3018 PyObject *attrs)
3019/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003020{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003021 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022}
3023
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024/* ==================================================================== */
3025/* the expat interface */
3026
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003028#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003029
3030/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3031 * cached globally without being in per-module state.
3032 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003033static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035
Eli Bendersky52467b12012-06-01 07:13:08 +03003036static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3037 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3038
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039typedef struct {
3040 PyObject_HEAD
3041
3042 XML_Parser parser;
3043
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003044 PyObject *target;
3045 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003046
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003047 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003049 PyObject *handle_start;
3050 PyObject *handle_data;
3051 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003053 PyObject *handle_comment;
3054 PyObject *handle_pi;
3055 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003057 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003058
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003059} XMLParserObject;
3060
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003061/* helpers */
3062
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063LOCAL(PyObject*)
3064makeuniversal(XMLParserObject* self, const char* string)
3065{
3066 /* convert a UTF-8 tag/attribute name from the expat parser
3067 to a universal name string */
3068
Antoine Pitrouc1948842012-10-01 23:40:37 +02003069 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 PyObject* key;
3071 PyObject* value;
3072
3073 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003074 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 if (!key)
3076 return NULL;
3077
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003078 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079
3080 if (value) {
3081 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003082 }
3083 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084 /* new name. convert to universal name, and decode as
3085 necessary */
3086
3087 PyObject* tag;
3088 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003089 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090
3091 /* look for namespace separator */
3092 for (i = 0; i < size; i++)
3093 if (string[i] == '}')
3094 break;
3095 if (i != size) {
3096 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003097 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003098 if (tag == NULL) {
3099 Py_DECREF(key);
3100 return NULL;
3101 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003102 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103 p[0] = '{';
3104 memcpy(p+1, string, size);
3105 size++;
3106 } else {
3107 /* plain name; use key as tag */
3108 Py_INCREF(key);
3109 tag = key;
3110 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003111
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003112 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003113 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003114 value = PyUnicode_DecodeUTF8(p, size, "strict");
3115 Py_DECREF(tag);
3116 if (!value) {
3117 Py_DECREF(key);
3118 return NULL;
3119 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003120
3121 /* add to names dictionary */
3122 if (PyDict_SetItem(self->names, key, value) < 0) {
3123 Py_DECREF(key);
3124 Py_DECREF(value);
3125 return NULL;
3126 }
3127 }
3128
3129 Py_DECREF(key);
3130 return value;
3131}
3132
Eli Bendersky5b77d812012-03-16 08:20:05 +02003133/* Set the ParseError exception with the given parameters.
3134 * If message is not NULL, it's used as the error string. Otherwise, the
3135 * message string is the default for the given error_code.
3136*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003137static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003138expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3139 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003140{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003141 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003142 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003143
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003144 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003145 message ? message : EXPAT(ErrorString)(error_code),
3146 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003147 if (errmsg == NULL)
3148 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003149
Stefan Behnel43851a22019-05-01 21:20:38 +02003150 error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003151 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003152 if (!error)
3153 return;
3154
Eli Bendersky5b77d812012-03-16 08:20:05 +02003155 /* Add code and position attributes */
3156 code = PyLong_FromLong((long)error_code);
3157 if (!code) {
3158 Py_DECREF(error);
3159 return;
3160 }
3161 if (PyObject_SetAttrString(error, "code", code) == -1) {
3162 Py_DECREF(error);
3163 Py_DECREF(code);
3164 return;
3165 }
3166 Py_DECREF(code);
3167
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003168 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003169 if (!position) {
3170 Py_DECREF(error);
3171 return;
3172 }
3173 if (PyObject_SetAttrString(error, "position", position) == -1) {
3174 Py_DECREF(error);
3175 Py_DECREF(position);
3176 return;
3177 }
3178 Py_DECREF(position);
3179
Eli Bendersky532d03e2013-08-10 08:00:39 -07003180 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003181 Py_DECREF(error);
3182}
3183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184/* -------------------------------------------------------------------- */
3185/* handlers */
3186
3187static void
3188expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3189 int data_len)
3190{
3191 PyObject* key;
3192 PyObject* value;
3193 PyObject* res;
3194
3195 if (data_len < 2 || data_in[0] != '&')
3196 return;
3197
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003198 if (PyErr_Occurred())
3199 return;
3200
Neal Norwitz0269b912007-08-08 06:56:02 +00003201 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 if (!key)
3203 return;
3204
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003205 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003206
3207 if (value) {
3208 if (TreeBuilder_CheckExact(self->target))
3209 res = treebuilder_handle_data(
3210 (TreeBuilderObject*) self->target, value
3211 );
3212 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003213 res = _PyObject_FastCall(self->handle_data, &value, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214 else
3215 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003217 } else if (!PyErr_Occurred()) {
3218 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003219 char message[128] = "undefined entity ";
3220 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003221 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003222 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003224 EXPAT(GetErrorColumnNumber)(self->parser),
3225 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 );
3227 }
3228
3229 Py_DECREF(key);
3230}
3231
3232static void
3233expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3234 const XML_Char **attrib_in)
3235{
3236 PyObject* res;
3237 PyObject* tag;
3238 PyObject* attrib;
3239 int ok;
3240
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003241 if (PyErr_Occurred())
3242 return;
3243
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 /* tag name */
3245 tag = makeuniversal(self, tag_in);
3246 if (!tag)
3247 return; /* parser will look for errors */
3248
3249 /* attributes */
3250 if (attrib_in[0]) {
3251 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003252 if (!attrib) {
3253 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003255 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 while (attrib_in[0] && attrib_in[1]) {
3257 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003258 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 if (!key || !value) {
3260 Py_XDECREF(value);
3261 Py_XDECREF(key);
3262 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003263 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 return;
3265 }
3266 ok = PyDict_SetItem(attrib, key, value);
3267 Py_DECREF(value);
3268 Py_DECREF(key);
3269 if (ok < 0) {
3270 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003271 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 return;
3273 }
3274 attrib_in += 2;
3275 }
3276 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003277 Py_INCREF(Py_None);
3278 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003279 }
3280
3281 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 /* shortcut */
3283 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3284 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003285 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003286 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003287 if (attrib == Py_None) {
3288 Py_DECREF(attrib);
3289 attrib = PyDict_New();
3290 if (!attrib) {
3291 Py_DECREF(tag);
3292 return;
3293 }
3294 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003295 res = PyObject_CallFunctionObjArgs(self->handle_start,
3296 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003297 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 res = NULL;
3299
3300 Py_DECREF(tag);
3301 Py_DECREF(attrib);
3302
3303 Py_XDECREF(res);
3304}
3305
3306static void
3307expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3308 int data_len)
3309{
3310 PyObject* data;
3311 PyObject* res;
3312
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003313 if (PyErr_Occurred())
3314 return;
3315
Neal Norwitz0269b912007-08-08 06:56:02 +00003316 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003317 if (!data)
3318 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319
3320 if (TreeBuilder_CheckExact(self->target))
3321 /* shortcut */
3322 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3323 else if (self->handle_data)
Stefan Behnel43851a22019-05-01 21:20:38 +02003324 res = _PyObject_FastCall(self->handle_data, &data, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325 else
3326 res = NULL;
3327
3328 Py_DECREF(data);
3329
3330 Py_XDECREF(res);
3331}
3332
3333static void
3334expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3335{
3336 PyObject* tag;
3337 PyObject* res = NULL;
3338
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003339 if (PyErr_Occurred())
3340 return;
3341
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 if (TreeBuilder_CheckExact(self->target))
3343 /* shortcut */
3344 /* the standard tree builder doesn't look at the end tag */
3345 res = treebuilder_handle_end(
3346 (TreeBuilderObject*) self->target, Py_None
3347 );
3348 else if (self->handle_end) {
3349 tag = makeuniversal(self, tag_in);
3350 if (tag) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003351 res = _PyObject_FastCall(self->handle_end, &tag, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352 Py_DECREF(tag);
3353 }
3354 }
3355
3356 Py_XDECREF(res);
3357}
3358
3359static void
3360expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3361 const XML_Char *uri)
3362{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003363 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3364 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003365
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003366 if (PyErr_Occurred())
3367 return;
3368
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003369 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003370 return;
3371
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003372 if (!uri)
3373 uri = "";
3374 if (!prefix)
3375 prefix = "";
3376
3377 parcel = Py_BuildValue("ss", prefix, uri);
3378 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003379 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003380 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3381 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003382}
3383
3384static void
3385expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3386{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003387 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3388
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003389 if (PyErr_Occurred())
3390 return;
3391
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003392 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003393 return;
3394
3395 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396}
3397
3398static void
3399expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3400{
Stefan Behnel43851a22019-05-01 21:20:38 +02003401 PyObject* comment = NULL;
3402 PyObject* res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003404 if (PyErr_Occurred())
3405 return;
3406
Stefan Behnel43851a22019-05-01 21:20:38 +02003407 if (TreeBuilder_CheckExact(self->target)) {
3408 /* shortcut */
3409 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3410
Neal Norwitz0269b912007-08-08 06:56:02 +00003411 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003412 if (!comment)
3413 return; /* parser will look for errors */
3414
3415 res = treebuilder_handle_comment(target, comment);
3416 } else if (self->handle_comment) {
3417 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3418 if (!comment)
3419 return;
3420
3421 res = _PyObject_FastCall(self->handle_comment, &comment, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003423
3424 Py_XDECREF(res);
3425 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003426}
3427
Eli Bendersky45839902013-01-13 05:14:47 -08003428static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003429expat_start_doctype_handler(XMLParserObject *self,
3430 const XML_Char *doctype_name,
3431 const XML_Char *sysid,
3432 const XML_Char *pubid,
3433 int has_internal_subset)
3434{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003435 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003436 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003437 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003438
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003439 if (PyErr_Occurred())
3440 return;
3441
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003442 doctype_name_obj = makeuniversal(self, doctype_name);
3443 if (!doctype_name_obj)
3444 return;
3445
3446 if (sysid) {
3447 sysid_obj = makeuniversal(self, sysid);
3448 if (!sysid_obj) {
3449 Py_DECREF(doctype_name_obj);
3450 return;
3451 }
3452 } else {
3453 Py_INCREF(Py_None);
3454 sysid_obj = Py_None;
3455 }
3456
3457 if (pubid) {
3458 pubid_obj = makeuniversal(self, pubid);
3459 if (!pubid_obj) {
3460 Py_DECREF(doctype_name_obj);
3461 Py_DECREF(sysid_obj);
3462 return;
3463 }
3464 } else {
3465 Py_INCREF(Py_None);
3466 pubid_obj = Py_None;
3467 }
3468
3469 /* If the target has a handler for doctype, call it. */
3470 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003471 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3472 doctype_name_obj, pubid_obj,
3473 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003474 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003475 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003476 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3477 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3478 "The doctype() method of XMLParser is ignored. "
3479 "Define doctype() method on the TreeBuilder target.",
3480 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003481 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003482 }
3483
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003484 Py_DECREF(doctype_name_obj);
3485 Py_DECREF(pubid_obj);
3486 Py_DECREF(sysid_obj);
3487}
3488
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003489static void
3490expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3491 const XML_Char* data_in)
3492{
Stefan Behnel43851a22019-05-01 21:20:38 +02003493 PyObject* pi_target = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494 PyObject* data;
3495 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003496 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003497
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003498 if (PyErr_Occurred())
3499 return;
3500
Stefan Behnel43851a22019-05-01 21:20:38 +02003501 if (TreeBuilder_CheckExact(self->target)) {
3502 /* shortcut */
3503 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3504
3505 if (target->events_append && target->pi_event_obj) {
3506 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3507 if (!pi_target)
3508 goto error;
3509 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3510 if (!data)
3511 goto error;
3512 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003513 Py_XDECREF(res);
3514 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003515 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003517 } else if (self->handle_pi) {
3518 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3519 if (!pi_target)
3520 goto error;
3521 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3522 if (!data)
3523 goto error;
3524
3525 stack[0] = pi_target;
3526 stack[1] = data;
3527 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3528 Py_XDECREF(res);
3529 Py_DECREF(data);
3530 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003531 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003532
3533 return;
3534
3535 error:
3536 Py_XDECREF(pi_target);
3537 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003538}
3539
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003541
Eli Bendersky52467b12012-06-01 07:13:08 +03003542static PyObject *
3543xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003544{
Eli Bendersky52467b12012-06-01 07:13:08 +03003545 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3546 if (self) {
3547 self->parser = NULL;
3548 self->target = self->entity = self->names = NULL;
3549 self->handle_start = self->handle_data = self->handle_end = NULL;
3550 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003551 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003553 return (PyObject *)self;
3554}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555
scoderc8d8e152017-09-14 22:00:03 +02003556static int
3557ignore_attribute_error(PyObject *value)
3558{
3559 if (value == NULL) {
3560 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3561 return -1;
3562 }
3563 PyErr_Clear();
3564 }
3565 return 0;
3566}
3567
Serhiy Storchakacb985562015-05-04 15:32:48 +03003568/*[clinic input]
3569_elementtree.XMLParser.__init__
3570
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003571 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003572 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003573 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003574
3575[clinic start generated code]*/
3576
Eli Bendersky52467b12012-06-01 07:13:08 +03003577static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003578_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3579 const char *encoding)
3580/*[clinic end generated code: output=3ae45ec6cdf344e4 input=96288fcba916cfce]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003581{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003582 self->entity = PyDict_New();
3583 if (!self->entity)
3584 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585
Serhiy Storchakacb985562015-05-04 15:32:48 +03003586 self->names = PyDict_New();
3587 if (!self->names) {
3588 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003589 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003590 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003591
Serhiy Storchakacb985562015-05-04 15:32:48 +03003592 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3593 if (!self->parser) {
3594 Py_CLEAR(self->entity);
3595 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003596 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003597 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003599 /* expat < 2.1.0 has no XML_SetHashSalt() */
3600 if (EXPAT(SetHashSalt) != NULL) {
3601 EXPAT(SetHashSalt)(self->parser,
3602 (unsigned long)_Py_HashSecret.expat.hashsalt);
3603 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
Eli Bendersky52467b12012-06-01 07:13:08 +03003605 if (target) {
3606 Py_INCREF(target);
3607 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003608 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003610 Py_CLEAR(self->entity);
3611 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003612 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003614 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003615 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616
Serhiy Storchakacb985562015-05-04 15:32:48 +03003617 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003618 if (ignore_attribute_error(self->handle_start)) {
3619 return -1;
3620 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003621 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003622 if (ignore_attribute_error(self->handle_data)) {
3623 return -1;
3624 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003625 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003626 if (ignore_attribute_error(self->handle_end)) {
3627 return -1;
3628 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003629 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003630 if (ignore_attribute_error(self->handle_comment)) {
3631 return -1;
3632 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003633 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003634 if (ignore_attribute_error(self->handle_pi)) {
3635 return -1;
3636 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003637 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003638 if (ignore_attribute_error(self->handle_close)) {
3639 return -1;
3640 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003641 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003642 if (ignore_attribute_error(self->handle_doctype)) {
3643 return -1;
3644 }
Eli Bendersky45839902013-01-13 05:14:47 -08003645
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003647 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003649 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003650 (XML_StartElementHandler) expat_start_handler,
3651 (XML_EndElementHandler) expat_end_handler
3652 );
3653 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003654 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655 (XML_DefaultHandler) expat_default_handler
3656 );
3657 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003658 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659 (XML_CharacterDataHandler) expat_data_handler
3660 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003661 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003663 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664 (XML_CommentHandler) expat_comment_handler
3665 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003666 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003668 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669 (XML_ProcessingInstructionHandler) expat_pi_handler
3670 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003671 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003672 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003673 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3674 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003676 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003677 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679
Eli Bendersky52467b12012-06-01 07:13:08 +03003680 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681}
3682
Eli Bendersky52467b12012-06-01 07:13:08 +03003683static int
3684xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3685{
3686 Py_VISIT(self->handle_close);
3687 Py_VISIT(self->handle_pi);
3688 Py_VISIT(self->handle_comment);
3689 Py_VISIT(self->handle_end);
3690 Py_VISIT(self->handle_data);
3691 Py_VISIT(self->handle_start);
3692
3693 Py_VISIT(self->target);
3694 Py_VISIT(self->entity);
3695 Py_VISIT(self->names);
3696
3697 return 0;
3698}
3699
3700static int
3701xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003702{
Victor Stinnere727d412017-09-18 05:29:37 -07003703 if (self->parser != NULL) {
3704 XML_Parser parser = self->parser;
3705 self->parser = NULL;
3706 EXPAT(ParserFree)(parser);
3707 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003708
Antoine Pitrouc1948842012-10-01 23:40:37 +02003709 Py_CLEAR(self->handle_close);
3710 Py_CLEAR(self->handle_pi);
3711 Py_CLEAR(self->handle_comment);
3712 Py_CLEAR(self->handle_end);
3713 Py_CLEAR(self->handle_data);
3714 Py_CLEAR(self->handle_start);
3715 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003716
Antoine Pitrouc1948842012-10-01 23:40:37 +02003717 Py_CLEAR(self->target);
3718 Py_CLEAR(self->entity);
3719 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720
Eli Bendersky52467b12012-06-01 07:13:08 +03003721 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722}
3723
Eli Bendersky52467b12012-06-01 07:13:08 +03003724static void
3725xmlparser_dealloc(XMLParserObject* self)
3726{
3727 PyObject_GC_UnTrack(self);
3728 xmlparser_gc_clear(self);
3729 Py_TYPE(self)->tp_free((PyObject *)self);
3730}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003731
3732LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003733expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734{
3735 int ok;
3736
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003737 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3739
3740 if (PyErr_Occurred())
3741 return NULL;
3742
3743 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003744 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003745 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003746 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003747 EXPAT(GetErrorColumnNumber)(self->parser),
3748 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003749 );
3750 return NULL;
3751 }
3752
3753 Py_RETURN_NONE;
3754}
3755
Serhiy Storchakacb985562015-05-04 15:32:48 +03003756/*[clinic input]
3757_elementtree.XMLParser.close
3758
3759[clinic start generated code]*/
3760
3761static PyObject *
3762_elementtree_XMLParser_close_impl(XMLParserObject *self)
3763/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003764{
3765 /* end feeding data to parser */
3766
3767 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003768 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003769 if (!res)
3770 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003771
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003772 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003773 Py_DECREF(res);
3774 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003775 }
3776 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003777 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003778 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003779 }
3780 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003781 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003782 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003783}
3784
Serhiy Storchakacb985562015-05-04 15:32:48 +03003785/*[clinic input]
3786_elementtree.XMLParser.feed
3787
3788 data: object
3789 /
3790
3791[clinic start generated code]*/
3792
3793static PyObject *
3794_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3795/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003796{
3797 /* feed data to parser */
3798
Serhiy Storchakacb985562015-05-04 15:32:48 +03003799 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003800 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003801 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3802 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003803 return NULL;
3804 if (data_len > INT_MAX) {
3805 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3806 return NULL;
3807 }
3808 /* Explicitly set UTF-8 encoding. Return code ignored. */
3809 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003810 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003811 }
3812 else {
3813 Py_buffer view;
3814 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003815 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003816 return NULL;
3817 if (view.len > INT_MAX) {
3818 PyBuffer_Release(&view);
3819 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3820 return NULL;
3821 }
3822 res = expat_parse(self, view.buf, (int)view.len, 0);
3823 PyBuffer_Release(&view);
3824 return res;
3825 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003826}
3827
Serhiy Storchakacb985562015-05-04 15:32:48 +03003828/*[clinic input]
3829_elementtree.XMLParser._parse_whole
3830
3831 file: object
3832 /
3833
3834[clinic start generated code]*/
3835
3836static PyObject *
3837_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3838/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003839{
Eli Benderskya3699232013-05-19 18:47:23 -07003840 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003841 PyObject* reader;
3842 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003843 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003844 PyObject* res;
3845
Serhiy Storchakacb985562015-05-04 15:32:48 +03003846 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003847 if (!reader)
3848 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003849
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003850 /* read from open file object */
3851 for (;;) {
3852
3853 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3854
3855 if (!buffer) {
3856 /* read failed (e.g. due to KeyboardInterrupt) */
3857 Py_DECREF(reader);
3858 return NULL;
3859 }
3860
Eli Benderskyf996e772012-03-16 05:53:30 +02003861 if (PyUnicode_CheckExact(buffer)) {
3862 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003863 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003864 Py_DECREF(buffer);
3865 break;
3866 }
3867 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003868 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003869 if (!temp) {
3870 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003871 Py_DECREF(reader);
3872 return NULL;
3873 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003874 buffer = temp;
3875 }
3876 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003877 Py_DECREF(buffer);
3878 break;
3879 }
3880
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003881 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3882 Py_DECREF(buffer);
3883 Py_DECREF(reader);
3884 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3885 return NULL;
3886 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003887 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003888 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003889 );
3890
3891 Py_DECREF(buffer);
3892
3893 if (!res) {
3894 Py_DECREF(reader);
3895 return NULL;
3896 }
3897 Py_DECREF(res);
3898
3899 }
3900
3901 Py_DECREF(reader);
3902
3903 res = expat_parse(self, "", 0, 1);
3904
3905 if (res && TreeBuilder_CheckExact(self->target)) {
3906 Py_DECREF(res);
3907 return treebuilder_done((TreeBuilderObject*) self->target);
3908 }
3909
3910 return res;
3911}
3912
Serhiy Storchakacb985562015-05-04 15:32:48 +03003913/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03003914_elementtree.XMLParser._setevents
3915
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003916 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003917 events_to_report: object = None
3918 /
3919
3920[clinic start generated code]*/
3921
3922static PyObject *
3923_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3924 PyObject *events_queue,
3925 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003926/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003927{
3928 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003929 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003930 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003931 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003932
3933 if (!TreeBuilder_CheckExact(self->target)) {
3934 PyErr_SetString(
3935 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003936 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003937 "targets"
3938 );
3939 return NULL;
3940 }
3941
3942 target = (TreeBuilderObject*) self->target;
3943
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003944 events_append = PyObject_GetAttrString(events_queue, "append");
3945 if (events_append == NULL)
3946 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003947 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003948
3949 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003950 Py_CLEAR(target->start_event_obj);
3951 Py_CLEAR(target->end_event_obj);
3952 Py_CLEAR(target->start_ns_event_obj);
3953 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02003954 Py_CLEAR(target->comment_event_obj);
3955 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003956
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003957 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003958 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003959 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003960 Py_RETURN_NONE;
3961 }
3962
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003963 if (!(events_seq = PySequence_Fast(events_to_report,
3964 "events must be a sequence"))) {
3965 return NULL;
3966 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003967
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003968 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003969 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003970 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003971 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003972 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003973 } else if (PyBytes_Check(event_name_obj)) {
3974 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003975 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003976 if (event_name == NULL) {
3977 Py_DECREF(events_seq);
3978 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3979 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003980 }
3981
3982 Py_INCREF(event_name_obj);
3983 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003984 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003985 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003986 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003987 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003988 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003989 EXPAT(SetNamespaceDeclHandler)(
3990 self->parser,
3991 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3992 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3993 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003994 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003995 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003996 EXPAT(SetNamespaceDeclHandler)(
3997 self->parser,
3998 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3999 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4000 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004001 } else if (strcmp(event_name, "comment") == 0) {
4002 Py_XSETREF(target->comment_event_obj, event_name_obj);
4003 EXPAT(SetCommentHandler)(
4004 self->parser,
4005 (XML_CommentHandler) expat_comment_handler
4006 );
4007 } else if (strcmp(event_name, "pi") == 0) {
4008 Py_XSETREF(target->pi_event_obj, event_name_obj);
4009 EXPAT(SetProcessingInstructionHandler)(
4010 self->parser,
4011 (XML_ProcessingInstructionHandler) expat_pi_handler
4012 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004013 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004014 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004015 Py_DECREF(events_seq);
4016 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004017 return NULL;
4018 }
4019 }
4020
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004021 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004022 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004023}
4024
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004025static PyMemberDef xmlparser_members[] = {
4026 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4027 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4028 {NULL}
4029};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004030
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004031static PyObject*
4032xmlparser_version_getter(XMLParserObject *self, void *closure)
4033{
4034 return PyUnicode_FromFormat(
4035 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4036 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004037}
4038
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004039static PyGetSetDef xmlparser_getsetlist[] = {
4040 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4041 {NULL},
4042};
4043
Serhiy Storchakacb985562015-05-04 15:32:48 +03004044#include "clinic/_elementtree.c.h"
4045
4046static PyMethodDef element_methods[] = {
4047
4048 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4049
4050 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4051 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4052
4053 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4054 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4055 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4056
4057 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4058 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4059 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4060 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4061
4062 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4063 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4064 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4065
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004066 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004067 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
4068
4069 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4070 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4071
4072 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4073
4074 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4075 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4076 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4077 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4078 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4079
4080 {NULL, NULL}
4081};
4082
4083static PyMappingMethods element_as_mapping = {
4084 (lenfunc) element_length,
4085 (binaryfunc) element_subscr,
4086 (objobjargproc) element_ass_subscr,
4087};
4088
Serhiy Storchakadde08152015-11-25 15:28:13 +02004089static PyGetSetDef element_getsetlist[] = {
4090 {"tag",
4091 (getter)element_tag_getter,
4092 (setter)element_tag_setter,
4093 "A string identifying what kind of data this element represents"},
4094 {"text",
4095 (getter)element_text_getter,
4096 (setter)element_text_setter,
4097 "A string of text directly after the start tag, or None"},
4098 {"tail",
4099 (getter)element_tail_getter,
4100 (setter)element_tail_setter,
4101 "A string of text directly after the end tag, or None"},
4102 {"attrib",
4103 (getter)element_attrib_getter,
4104 (setter)element_attrib_setter,
4105 "A dictionary containing the element's attributes"},
4106 {NULL},
4107};
4108
Serhiy Storchakacb985562015-05-04 15:32:48 +03004109static PyTypeObject Element_Type = {
4110 PyVarObject_HEAD_INIT(NULL, 0)
4111 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4112 /* methods */
4113 (destructor)element_dealloc, /* tp_dealloc */
4114 0, /* tp_print */
4115 0, /* tp_getattr */
4116 0, /* tp_setattr */
4117 0, /* tp_reserved */
4118 (reprfunc)element_repr, /* tp_repr */
4119 0, /* tp_as_number */
4120 &element_as_sequence, /* tp_as_sequence */
4121 &element_as_mapping, /* tp_as_mapping */
4122 0, /* tp_hash */
4123 0, /* tp_call */
4124 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004125 PyObject_GenericGetAttr, /* tp_getattro */
4126 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004127 0, /* tp_as_buffer */
4128 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4129 /* tp_flags */
4130 0, /* tp_doc */
4131 (traverseproc)element_gc_traverse, /* tp_traverse */
4132 (inquiry)element_gc_clear, /* tp_clear */
4133 0, /* tp_richcompare */
4134 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4135 0, /* tp_iter */
4136 0, /* tp_iternext */
4137 element_methods, /* tp_methods */
4138 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004139 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004140 0, /* tp_base */
4141 0, /* tp_dict */
4142 0, /* tp_descr_get */
4143 0, /* tp_descr_set */
4144 0, /* tp_dictoffset */
4145 (initproc)element_init, /* tp_init */
4146 PyType_GenericAlloc, /* tp_alloc */
4147 element_new, /* tp_new */
4148 0, /* tp_free */
4149};
4150
4151static PyMethodDef treebuilder_methods[] = {
4152 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4153 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4154 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004155 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4156 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004157 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4158 {NULL, NULL}
4159};
4160
4161static PyTypeObject TreeBuilder_Type = {
4162 PyVarObject_HEAD_INIT(NULL, 0)
4163 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4164 /* methods */
4165 (destructor)treebuilder_dealloc, /* tp_dealloc */
4166 0, /* tp_print */
4167 0, /* tp_getattr */
4168 0, /* tp_setattr */
4169 0, /* tp_reserved */
4170 0, /* tp_repr */
4171 0, /* tp_as_number */
4172 0, /* tp_as_sequence */
4173 0, /* tp_as_mapping */
4174 0, /* tp_hash */
4175 0, /* tp_call */
4176 0, /* tp_str */
4177 0, /* tp_getattro */
4178 0, /* tp_setattro */
4179 0, /* tp_as_buffer */
4180 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4181 /* tp_flags */
4182 0, /* tp_doc */
4183 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4184 (inquiry)treebuilder_gc_clear, /* tp_clear */
4185 0, /* tp_richcompare */
4186 0, /* tp_weaklistoffset */
4187 0, /* tp_iter */
4188 0, /* tp_iternext */
4189 treebuilder_methods, /* tp_methods */
4190 0, /* tp_members */
4191 0, /* tp_getset */
4192 0, /* tp_base */
4193 0, /* tp_dict */
4194 0, /* tp_descr_get */
4195 0, /* tp_descr_set */
4196 0, /* tp_dictoffset */
4197 _elementtree_TreeBuilder___init__, /* tp_init */
4198 PyType_GenericAlloc, /* tp_alloc */
4199 treebuilder_new, /* tp_new */
4200 0, /* tp_free */
4201};
4202
4203static PyMethodDef xmlparser_methods[] = {
4204 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4205 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4206 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4207 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004208 {NULL, NULL}
4209};
4210
Neal Norwitz227b5332006-03-22 09:28:35 +00004211static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004212 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004213 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004214 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004215 (destructor)xmlparser_dealloc, /* tp_dealloc */
4216 0, /* tp_print */
4217 0, /* tp_getattr */
4218 0, /* tp_setattr */
4219 0, /* tp_reserved */
4220 0, /* tp_repr */
4221 0, /* tp_as_number */
4222 0, /* tp_as_sequence */
4223 0, /* tp_as_mapping */
4224 0, /* tp_hash */
4225 0, /* tp_call */
4226 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004227 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004228 0, /* tp_setattro */
4229 0, /* tp_as_buffer */
4230 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4231 /* tp_flags */
4232 0, /* tp_doc */
4233 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4234 (inquiry)xmlparser_gc_clear, /* tp_clear */
4235 0, /* tp_richcompare */
4236 0, /* tp_weaklistoffset */
4237 0, /* tp_iter */
4238 0, /* tp_iternext */
4239 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004240 xmlparser_members, /* tp_members */
4241 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004242 0, /* tp_base */
4243 0, /* tp_dict */
4244 0, /* tp_descr_get */
4245 0, /* tp_descr_set */
4246 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004247 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004248 PyType_GenericAlloc, /* tp_alloc */
4249 xmlparser_new, /* tp_new */
4250 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004251};
4252
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004253/* ==================================================================== */
4254/* python module interface */
4255
4256static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004257 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004258 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004259 {NULL, NULL}
4260};
4261
Martin v. Löwis1a214512008-06-11 05:26:20 +00004262
Eli Bendersky532d03e2013-08-10 08:00:39 -07004263static struct PyModuleDef elementtreemodule = {
4264 PyModuleDef_HEAD_INIT,
4265 "_elementtree",
4266 NULL,
4267 sizeof(elementtreestate),
4268 _functions,
4269 NULL,
4270 elementtree_traverse,
4271 elementtree_clear,
4272 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004273};
4274
Neal Norwitzf6657e62006-12-28 04:47:50 +00004275PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004276PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004277{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004278 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004279 elementtreestate *st;
4280
4281 m = PyState_FindModule(&elementtreemodule);
4282 if (m) {
4283 Py_INCREF(m);
4284 return m;
4285 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004286
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004287 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004288 if (PyType_Ready(&ElementIter_Type) < 0)
4289 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004290 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004291 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004292 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004293 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004294 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004295 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004296
Eli Bendersky532d03e2013-08-10 08:00:39 -07004297 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004298 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004299 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004300 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004301
Eli Bendersky828efde2012-04-05 05:40:58 +03004302 if (!(temp = PyImport_ImportModule("copy")))
4303 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004304 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004305 Py_XDECREF(temp);
4306
Victor Stinnerb136f112017-07-10 22:28:02 +02004307 if (st->deepcopy_obj == NULL) {
4308 return NULL;
4309 }
4310
4311 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004312 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004313 return NULL;
4314
Eli Bendersky20d41742012-06-01 09:48:37 +03004315 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004316 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4317 if (expat_capi) {
4318 /* check that it's usable */
4319 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004320 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004321 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4322 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004323 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004324 PyErr_SetString(PyExc_ImportError,
4325 "pyexpat version is incompatible");
4326 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004327 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004328 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004329 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004330 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004331
Eli Bendersky532d03e2013-08-10 08:00:39 -07004332 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004333 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004334 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004335 Py_INCREF(st->parseerror_obj);
4336 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004337
Eli Bendersky092af1f2012-03-04 07:14:03 +02004338 Py_INCREF((PyObject *)&Element_Type);
4339 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4340
Eli Bendersky58d548d2012-05-29 15:45:16 +03004341 Py_INCREF((PyObject *)&TreeBuilder_Type);
4342 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4343
Eli Bendersky52467b12012-06-01 07:13:08 +03004344 Py_INCREF((PyObject *)&XMLParser_Type);
4345 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004346
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004347 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004348}