blob: 2c92a8aedb5a8841a9354defc38a9a97882888e1 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020017#include "structmember.h" // PyMemberDef
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
Hai Shif707d942020-03-16 21:15:01 +0800104static inline elementtreestate*
105get_elementtree_state(PyObject *module)
106{
107 void *state = PyModule_GetState(module);
108 assert(state != NULL);
109 return (elementtreestate *)state;
110}
Eli Bendersky532d03e2013-08-10 08:00:39 -0700111
112/* Find the module instance imported in the currently running sub-interpreter
113 * and get its state.
114 */
115#define ET_STATE_GLOBAL \
116 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
117
118static int
119elementtree_clear(PyObject *m)
120{
Hai Shif707d942020-03-16 21:15:01 +0800121 elementtreestate *st = get_elementtree_state(m);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700122 Py_CLEAR(st->parseerror_obj);
123 Py_CLEAR(st->deepcopy_obj);
124 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200125 Py_CLEAR(st->comment_factory);
126 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700127 return 0;
128}
129
130static int
131elementtree_traverse(PyObject *m, visitproc visit, void *arg)
132{
Hai Shif707d942020-03-16 21:15:01 +0800133 elementtreestate *st = get_elementtree_state(m);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700134 Py_VISIT(st->parseerror_obj);
135 Py_VISIT(st->deepcopy_obj);
136 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200137 Py_VISIT(st->comment_factory);
138 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 return 0;
140}
141
142static void
143elementtree_free(void *m)
144{
145 elementtree_clear((PyObject *)m);
146}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000147
148/* helpers */
149
150LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151list_join(PyObject* list)
152{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300153 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 PyObject* result;
156
Antoine Pitrouc1948842012-10-01 23:40:37 +0200157 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 if (!joiner)
159 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162 return result;
163}
164
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165/* Is the given object an empty dictionary?
166*/
167static int
168is_empty_dict(PyObject *obj)
169{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200170 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300171}
172
173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200175/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000176
177typedef struct {
178
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200179 /* attributes (a dictionary object), or NULL if no attributes */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyObject* attrib;
181
182 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200183 Py_ssize_t length; /* actual number of items */
184 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185
186 /* this either points to _children or to a malloced buffer */
187 PyObject* *children;
188
189 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100190
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000191} ElementObjectExtra;
192
193typedef struct {
194 PyObject_HEAD
195
196 /* element tag (a string). */
197 PyObject* tag;
198
199 /* text before first child. note that this is a tagged pointer;
200 use JOIN_OBJ to get the object pointer. the join flag is used
201 to distinguish lists created by the tree builder from lists
202 assigned to the attribute by application code; the former
203 should be joined before being returned to the user, the latter
204 should be left intact. */
205 PyObject* text;
206
207 /* text after this element, in parent. note that this is a tagged
208 pointer; use JOIN_OBJ to get the object pointer. */
209 PyObject* tail;
210
211 ElementObjectExtra* extra;
212
Eli Benderskyebf37a22012-04-03 22:02:37 +0300213 PyObject *weakreflist; /* For tp_weaklistoffset */
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215} ElementObject;
216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217
Dong-hee Na1b55b652020-02-17 19:09:15 +0900218#define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300219#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
220
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200223/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227{
228 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200229 if (!self->extra) {
230 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000231 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200232 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000233
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200234 Py_XINCREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235 self->extra->attrib = attrib;
236
237 self->extra->length = 0;
238 self->extra->allocated = STATIC_CHILDREN;
239 self->extra->children = self->extra->_children;
240
241 return 0;
242}
243
244LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300245dealloc_extra(ElementObjectExtra *extra)
246{
247 Py_ssize_t i;
248
249 if (!extra)
250 return;
251
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200252 Py_XDECREF(extra->attrib);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300253
254 for (i = 0; i < extra->length; i++)
255 Py_DECREF(extra->children[i]);
256
257 if (extra->children != extra->_children)
258 PyObject_Free(extra->children);
259
260 PyObject_Free(extra);
261}
262
263LOCAL(void)
264clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000265{
Eli Bendersky08b85292012-04-04 15:55:07 +0300266 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 if (!self->extra)
269 return;
270
271 /* Avoid DECREFs calling into this code again (cycles, etc.)
272 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300273 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 self->extra = NULL;
275
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300276 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000277}
278
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279/* Convenience internal function to create new Element objects with the given
280 * tag and attributes.
281*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200283create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284{
285 ElementObject* self;
286
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 if (self == NULL)
289 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 self->extra = NULL;
291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292 Py_INCREF(tag);
293 self->tag = tag;
294
295 Py_INCREF(Py_None);
296 self->text = Py_None;
297
298 Py_INCREF(Py_None);
299 self->tail = Py_None;
300
Eli Benderskyebf37a22012-04-03 22:02:37 +0300301 self->weakreflist = NULL;
302
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200303 ALLOC(sizeof(ElementObject), "create element");
304 PyObject_GC_Track(self);
305
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200306 if (attrib != NULL && !is_empty_dict(attrib)) {
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200308 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200309 return NULL;
310 }
311 }
312
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300331 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200332 }
333 return (PyObject *)e;
334}
335
Eli Bendersky737b1732012-05-29 06:02:56 +0300336/* Helper function for extracting the attrib dictionary from a keywords dict.
337 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800338 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700340 *
341 * Return a dictionary with the content of kwds merged into the content of
342 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300343 */
344static PyObject*
345get_attrib_from_keywords(PyObject *kwds)
346{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700347 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600348 if (attrib_str == NULL) {
349 return NULL;
350 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200351 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352
353 if (attrib) {
354 /* If attrib was found in kwds, copy its value and remove it from
355 * kwds
356 */
357 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700358 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
360 Py_TYPE(attrib)->tp_name);
361 return NULL;
362 }
363 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200364 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
365 Py_DECREF(attrib);
366 attrib = NULL;
367 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200368 }
369 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 attrib = PyDict_New();
371 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700372
373 Py_DECREF(attrib_str);
374
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600375 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
376 Py_DECREF(attrib);
377 return NULL;
378 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300379 return attrib;
380}
381
Serhiy Storchakacb985562015-05-04 15:32:48 +0300382/*[clinic input]
383module _elementtree
384class _elementtree.Element "ElementObject *" "&Element_Type"
385class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
386class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
387[clinic start generated code]*/
388/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
389
Eli Bendersky092af1f2012-03-04 07:14:03 +0200390static int
391element_init(PyObject *self, PyObject *args, PyObject *kwds)
392{
393 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 PyObject *attrib = NULL;
395 ElementObject *self_elem;
396
397 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398 return -1;
399
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 if (attrib) {
401 /* attrib passed as positional arg */
402 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (!attrib)
404 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 if (kwds) {
406 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300408 return -1;
409 }
410 }
411 } else if (kwds) {
412 /* have keywords args */
413 attrib = get_attrib_from_keywords(kwds);
414 if (!attrib)
415 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 }
417
418 self_elem = (ElementObject *)self;
419
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200422 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423 return -1;
424 }
425 }
426
Eli Bendersky48d358b2012-05-30 17:57:50 +0300427 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200428 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429
430 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300432 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
Eli Bendersky092af1f2012-03-04 07:14:03 +0200437 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300438 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200439
440 return 0;
441}
442
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200446 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 PyObject* *children;
448
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300449 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000450 /* make sure self->children can hold the given number of extra
451 elements. set an exception and return -1 if allocation failed */
452
Victor Stinner5f0af232013-07-11 23:01:36 +0200453 if (!self->extra) {
454 if (create_extra(self, NULL) < 0)
455 return -1;
456 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000457
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200458 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000459
460 if (size > self->extra->allocated) {
461 /* use Python 2.4's list growth strategy */
462 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100464 * which needs at least 4 bytes.
465 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 * be safe.
467 */
468 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200469 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
470 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000471 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000472 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100473 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000474 * false alarm always assume at least one child to be safe.
475 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000476 children = PyObject_Realloc(self->extra->children,
477 size * sizeof(PyObject*));
478 if (!children)
479 goto nomemory;
480 } else {
481 children = PyObject_Malloc(size * sizeof(PyObject*));
482 if (!children)
483 goto nomemory;
484 /* copy existing children from static area to malloc buffer */
485 memcpy(children, self->extra->children,
486 self->extra->length * sizeof(PyObject*));
487 }
488 self->extra->children = children;
489 self->extra->allocated = size;
490 }
491
492 return 0;
493
494 nomemory:
495 PyErr_NoMemory();
496 return -1;
497}
498
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300499LOCAL(void)
500raise_type_error(PyObject *element)
501{
502 PyErr_Format(PyExc_TypeError,
503 "expected an Element, not \"%.200s\"",
504 Py_TYPE(element)->tp_name);
505}
506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000507LOCAL(int)
508element_add_subelement(ElementObject* self, PyObject* element)
509{
510 /* add a child element to a parent */
511
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300512 if (!Element_Check(element)) {
513 raise_type_error(element);
514 return -1;
515 }
516
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000517 if (element_resize(self, 1) < 0)
518 return -1;
519
520 Py_INCREF(element);
521 self->extra->children[self->extra->length] = element;
522
523 self->extra->length++;
524
525 return 0;
526}
527
528LOCAL(PyObject*)
529element_get_attrib(ElementObject* self)
530{
531 /* return borrowed reference to attrib dictionary */
532 /* note: this function assumes that the extra section exists */
533
534 PyObject* res = self->extra->attrib;
535
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200536 if (!res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537 /* create missing dictionary */
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200538 res = self->extra->attrib = PyDict_New();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 }
540
541 return res;
542}
543
544LOCAL(PyObject*)
545element_get_text(ElementObject* self)
546{
547 /* return borrowed reference to text attribute */
548
Serhiy Storchaka576def02017-03-30 09:47:31 +0300549 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550
551 if (JOIN_GET(res)) {
552 res = JOIN_OBJ(res);
553 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300554 PyObject *tmp = list_join(res);
555 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000556 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300557 self->text = tmp;
558 Py_DECREF(res);
559 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000560 }
561 }
562
563 return res;
564}
565
566LOCAL(PyObject*)
567element_get_tail(ElementObject* self)
568{
569 /* return borrowed reference to text attribute */
570
Serhiy Storchaka576def02017-03-30 09:47:31 +0300571 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000572
573 if (JOIN_GET(res)) {
574 res = JOIN_OBJ(res);
575 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300576 PyObject *tmp = list_join(res);
577 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300579 self->tail = tmp;
580 Py_DECREF(res);
581 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 }
583 }
584
585 return res;
586}
587
588static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300589subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590{
591 PyObject* elem;
592
593 ElementObject* parent;
594 PyObject* tag;
595 PyObject* attrib = NULL;
596 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
597 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800598 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800600 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 if (attrib) {
603 /* attrib passed as positional arg */
604 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000605 if (!attrib)
606 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600607 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
608 Py_DECREF(attrib);
609 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300610 }
611 } else if (kwds) {
612 /* have keyword args */
613 attrib = get_attrib_from_keywords(kwds);
614 if (!attrib)
615 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300617 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618 }
619
Eli Bendersky092af1f2012-03-04 07:14:03 +0200620 elem = create_new_element(tag, attrib);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200621 Py_XDECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200622 if (elem == NULL)
623 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000625 if (element_add_subelement(parent, elem) < 0) {
626 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000629
630 return elem;
631}
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633static int
634element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
635{
636 Py_VISIT(self->tag);
637 Py_VISIT(JOIN_OBJ(self->text));
638 Py_VISIT(JOIN_OBJ(self->tail));
639
640 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200641 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 Py_VISIT(self->extra->attrib);
643
644 for (i = 0; i < self->extra->length; ++i)
645 Py_VISIT(self->extra->children[i]);
646 }
647 return 0;
648}
649
650static int
651element_gc_clear(ElementObject *self)
652{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300653 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700654 _clear_joined_ptr(&self->text);
655 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656
657 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300658 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300660 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300661 return 0;
662}
663
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664static void
665element_dealloc(ElementObject* self)
666{
INADA Naokia6296d32017-08-24 14:55:17 +0900667 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300668 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200669 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670
671 if (self->weakreflist != NULL)
672 PyObject_ClearWeakRefs((PyObject *) self);
673
Eli Bendersky0192ba32012-03-30 16:38:33 +0300674 /* element_gc_clear clears all references and deallocates extra
675 */
676 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
678 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200679 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200680 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681}
682
683/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688 subelement: object(subclass_of='&Element_Type')
689 /
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
695/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
696{
697 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698 return NULL;
699
700 Py_RETURN_NONE;
701}
702
Serhiy Storchakacb985562015-05-04 15:32:48 +0300703/*[clinic input]
704_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706[clinic start generated code]*/
707
708static PyObject *
709_elementtree_Element_clear_impl(ElementObject *self)
710/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
711{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300712 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300715 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_RETURN_NONE;
721}
722
Serhiy Storchakacb985562015-05-04 15:32:48 +0300723/*[clinic input]
724_elementtree.Element.__copy__
725
726[clinic start generated code]*/
727
728static PyObject *
729_elementtree_Element___copy___impl(ElementObject *self)
730/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200732 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733 ElementObject* element;
734
Eli Bendersky092af1f2012-03-04 07:14:03 +0200735 element = (ElementObject*) create_new_element(
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200736 self->tag, self->extra ? self->extra->attrib : NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 if (!element)
738 return NULL;
739
Oren Milman39ecb9c2017-10-10 23:26:24 +0300740 Py_INCREF(JOIN_OBJ(self->text));
741 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->tail));
744 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300746 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000748 if (element_resize(element, self->extra->length) < 0) {
749 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
753 for (i = 0; i < self->extra->length; i++) {
754 Py_INCREF(self->extra->children[i]);
755 element->extra->children[i] = self->extra->children[i];
756 }
757
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300758 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 }
761
762 return (PyObject*) element;
763}
764
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200765/* Helper for a deep copy. */
766LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
767
Serhiy Storchakacb985562015-05-04 15:32:48 +0300768/*[clinic input]
769_elementtree.Element.__deepcopy__
770
Oren Milmand0568182017-09-12 17:39:15 +0300771 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300772 /
773
774[clinic start generated code]*/
775
776static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300777_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
778/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200780 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000781 ElementObject* element;
782 PyObject* tag;
783 PyObject* attrib;
784 PyObject* text;
785 PyObject* tail;
786 PyObject* id;
787
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000788 tag = deepcopy(self->tag, memo);
789 if (!tag)
790 return NULL;
791
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200792 if (self->extra && self->extra->attrib) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000793 attrib = deepcopy(self->extra->attrib, memo);
794 if (!attrib) {
795 Py_DECREF(tag);
796 return NULL;
797 }
798 } else {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200799 attrib = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 }
801
Eli Bendersky092af1f2012-03-04 07:14:03 +0200802 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 Py_DECREF(tag);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200805 Py_XDECREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806
807 if (!element)
808 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 text = deepcopy(JOIN_OBJ(self->text), memo);
811 if (!text)
812 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300813 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814
815 tail = deepcopy(JOIN_OBJ(self->tail), memo);
816 if (!tail)
817 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300818 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300820 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822 if (element_resize(element, self->extra->length) < 0)
823 goto error;
824
825 for (i = 0; i < self->extra->length; i++) {
826 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300827 if (!child || !Element_Check(child)) {
828 if (child) {
829 raise_type_error(child);
830 Py_DECREF(child);
831 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832 element->extra->length = i;
833 goto error;
834 }
835 element->extra->children[i] = child;
836 }
837
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300838 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000840 }
841
842 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700843 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000844 if (!id)
845 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846
847 i = PyDict_SetItem(memo, id, (PyObject*) element);
848
849 Py_DECREF(id);
850
851 if (i < 0)
852 goto error;
853
854 return (PyObject*) element;
855
856 error:
857 Py_DECREF(element);
858 return NULL;
859}
860
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200861LOCAL(PyObject *)
862deepcopy(PyObject *object, PyObject *memo)
863{
864 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200866 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200867
868 /* Fast paths */
869 if (object == Py_None || PyUnicode_CheckExact(object)) {
870 Py_INCREF(object);
871 return object;
872 }
873
874 if (Py_REFCNT(object) == 1) {
875 if (PyDict_CheckExact(object)) {
876 PyObject *key, *value;
877 Py_ssize_t pos = 0;
878 int simple = 1;
879 while (PyDict_Next(object, &pos, &key, &value)) {
880 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
881 simple = 0;
882 break;
883 }
884 }
885 if (simple)
886 return PyDict_Copy(object);
887 /* Fall through to general case */
888 }
889 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300890 return _elementtree_Element___deepcopy___impl(
891 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200892 }
893 }
894
895 /* General case */
896 st = ET_STATE_GLOBAL;
897 if (!st->deepcopy_obj) {
898 PyErr_SetString(PyExc_RuntimeError,
899 "deepcopy helper not found");
900 return NULL;
901 }
902
Victor Stinner7fbac452016-08-20 01:34:44 +0200903 stack[0] = object;
904 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200905 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200906}
907
908
Serhiy Storchakacb985562015-05-04 15:32:48 +0300909/*[clinic input]
910_elementtree.Element.__sizeof__ -> Py_ssize_t
911
912[clinic start generated code]*/
913
914static Py_ssize_t
915_elementtree_Element___sizeof___impl(ElementObject *self)
916/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200917{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200918 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200919 if (self->extra) {
920 result += sizeof(ElementObjectExtra);
921 if (self->extra->children != self->extra->_children)
922 result += sizeof(PyObject*) * self->extra->allocated;
923 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300924 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200925}
926
Eli Bendersky698bdb22013-01-10 06:01:06 -0800927/* dict keys for getstate/setstate. */
928#define PICKLED_TAG "tag"
929#define PICKLED_CHILDREN "_children"
930#define PICKLED_ATTRIB "attrib"
931#define PICKLED_TAIL "tail"
932#define PICKLED_TEXT "text"
933
934/* __getstate__ returns a fabricated instance dict as in the pure-Python
935 * Element implementation, for interoperability/interchangeability. This
936 * makes the pure-Python implementation details an API, but (a) there aren't
937 * any unnecessary structures there; and (b) it buys compatibility with 3.2
938 * pickles. See issue #16076.
939 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300940/*[clinic input]
941_elementtree.Element.__getstate__
942
943[clinic start generated code]*/
944
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300946_elementtree_Element___getstate___impl(ElementObject *self)
947/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800948{
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200949 Py_ssize_t i;
950 PyObject *children, *attrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800951
952 /* Build a list of children. */
953 children = PyList_New(self->extra ? self->extra->length : 0);
954 if (!children)
955 return NULL;
956 for (i = 0; i < PyList_GET_SIZE(children); i++) {
957 PyObject *child = self->extra->children[i];
958 Py_INCREF(child);
959 PyList_SET_ITEM(children, i, child);
960 }
961
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200962 if (self->extra && self->extra->attrib) {
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200963 attrib = self->extra->attrib;
964 Py_INCREF(attrib);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800965 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966 else {
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200967 attrib = PyDict_New();
968 if (!attrib) {
969 Py_DECREF(children);
970 return NULL;
971 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972 }
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200973
974 return Py_BuildValue("{sOsNsNsOsO}",
975 PICKLED_TAG, self->tag,
976 PICKLED_CHILDREN, children,
977 PICKLED_ATTRIB, attrib,
978 PICKLED_TEXT, JOIN_OBJ(self->text),
979 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800980}
981
982static PyObject *
983element_setstate_from_attributes(ElementObject *self,
984 PyObject *tag,
985 PyObject *attrib,
986 PyObject *text,
987 PyObject *tail,
988 PyObject *children)
989{
990 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300991 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800992
993 if (!tag) {
994 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
995 return NULL;
996 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200998 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300999 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001000
Oren Milman39ecb9c2017-10-10 23:26:24 +03001001 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1002 Py_INCREF(JOIN_OBJ(text));
1003 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004
Oren Milman39ecb9c2017-10-10 23:26:24 +03001005 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1006 Py_INCREF(JOIN_OBJ(tail));
1007 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008
1009 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001010 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001012 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
1014 /* Compute 'nchildren'. */
1015 if (children) {
1016 if (!PyList_Check(children)) {
1017 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1018 return NULL;
1019 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001020 nchildren = PyList_GET_SIZE(children);
1021
1022 /* (Re-)allocate 'extra'.
1023 Avoid DECREFs calling into this code again (cycles, etc.)
1024 */
1025 oldextra = self->extra;
1026 self->extra = NULL;
1027 if (element_resize(self, nchildren)) {
1028 assert(!self->extra || !self->extra->length);
1029 clear_extra(self);
1030 self->extra = oldextra;
1031 return NULL;
1032 }
1033 assert(self->extra);
1034 assert(self->extra->allocated >= nchildren);
1035 if (oldextra) {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001036 assert(self->extra->attrib == NULL);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001037 self->extra->attrib = oldextra->attrib;
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001038 oldextra->attrib = NULL;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001039 }
1040
1041 /* Copy children */
1042 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001043 PyObject *child = PyList_GET_ITEM(children, i);
1044 if (!Element_Check(child)) {
1045 raise_type_error(child);
1046 self->extra->length = i;
1047 dealloc_extra(oldextra);
1048 return NULL;
1049 }
1050 Py_INCREF(child);
1051 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001052 }
1053
1054 assert(!self->extra->length);
1055 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001056 }
1057 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001058 if (element_resize(self, 0)) {
1059 return NULL;
1060 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001061 }
1062
Eli Bendersky698bdb22013-01-10 06:01:06 -08001063 /* Stash attrib. */
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001064 Py_XINCREF(attrib);
1065 Py_XSETREF(self->extra->attrib, attrib);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001066 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067
1068 Py_RETURN_NONE;
1069}
1070
1071/* __setstate__ for Element instance from the Python implementation.
1072 * 'state' should be the instance dict.
1073 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001074
Eli Bendersky698bdb22013-01-10 06:01:06 -08001075static PyObject *
1076element_setstate_from_Python(ElementObject *self, PyObject *state)
1077{
1078 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1079 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1080 PyObject *args;
1081 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001082 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001083
Eli Bendersky698bdb22013-01-10 06:01:06 -08001084 tag = attrib = text = tail = children = NULL;
1085 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001086 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001087 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001088
1089 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1090 &attrib, &text, &tail, &children))
1091 retval = element_setstate_from_attributes(self, tag, attrib, text,
1092 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001093 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001094 retval = NULL;
1095
1096 Py_DECREF(args);
1097 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098}
1099
Serhiy Storchakacb985562015-05-04 15:32:48 +03001100/*[clinic input]
1101_elementtree.Element.__setstate__
1102
1103 state: object
1104 /
1105
1106[clinic start generated code]*/
1107
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001109_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1110/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001111{
1112 if (!PyDict_CheckExact(state)) {
1113 PyErr_Format(PyExc_TypeError,
1114 "Don't know how to unpickle \"%.200R\" as an Element",
1115 state);
1116 return NULL;
1117 }
1118 else
1119 return element_setstate_from_Python(self, state);
1120}
1121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122LOCAL(int)
1123checkpath(PyObject* tag)
1124{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001125 Py_ssize_t i;
1126 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001127
1128 /* check if a tag contains an xpath character */
1129
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130#define PATHCHAR(ch) \
1131 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001135 const void *data = PyUnicode_DATA(tag);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001137 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1138 PyUnicode_READ(kind, data, 1) == '}' || (
1139 PyUnicode_READ(kind, data, 1) == '*' &&
1140 PyUnicode_READ(kind, data, 2) == '}'))) {
1141 /* wildcard: '{}tag' or '{*}tag' */
1142 return 1;
1143 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 for (i = 0; i < len; i++) {
1145 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1146 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151 return 1;
1152 }
1153 return 0;
1154 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001155 if (PyBytes_Check(tag)) {
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001156 const char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001157 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1158 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001159 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001160 /* wildcard: '{}tag' or '{*}tag' */
1161 return 1;
1162 }
1163 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164 if (p[i] == '{')
1165 check = 0;
1166 else if (p[i] == '}')
1167 check = 1;
1168 else if (check && PATHCHAR(p[i]))
1169 return 1;
1170 }
1171 return 0;
1172 }
1173
1174 return 1; /* unknown type; might be path expression */
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.extend
1179
1180 elements: object
1181 /
1182
1183[clinic start generated code]*/
1184
1185static PyObject *
1186_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1187/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188{
1189 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193 if (!seq) {
1194 PyErr_Format(
1195 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197 );
1198 return NULL;
1199 }
1200
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001201 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001203 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 if (element_add_subelement(self, element) < 0) {
1205 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001206 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001207 return NULL;
1208 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001209 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001210 }
1211
1212 Py_DECREF(seq);
1213
1214 Py_RETURN_NONE;
1215}
1216
Serhiy Storchakacb985562015-05-04 15:32:48 +03001217/*[clinic input]
1218_elementtree.Element.find
1219
1220 path: object
1221 namespaces: object = None
1222
1223[clinic start generated code]*/
1224
1225static PyObject *
1226_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1227 PyObject *namespaces)
1228/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001230 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001231 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001232
Serhiy Storchakacb985562015-05-04 15:32:48 +03001233 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001234 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001235 return _PyObject_CallMethodIdObjArgs(
1236 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001238 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239
1240 if (!self->extra)
1241 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001242
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001243 for (i = 0; i < self->extra->length; i++) {
1244 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001246 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001248 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 }
1255
1256 Py_RETURN_NONE;
1257}
1258
Serhiy Storchakacb985562015-05-04 15:32:48 +03001259/*[clinic input]
1260_elementtree.Element.findtext
1261
1262 path: object
1263 default: object = None
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1270 PyObject *default_value,
1271 PyObject *namespaces)
1272/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001274 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001275 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001276 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277
Serhiy Storchakacb985562015-05-04 15:32:48 +03001278 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001279 return _PyObject_CallMethodIdObjArgs(
1280 st->elementpath_obj, &PyId_findtext,
1281 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 );
1283
1284 if (!self->extra) {
1285 Py_INCREF(default_value);
1286 return default_value;
1287 }
1288
1289 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001290 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001292 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001294 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001296 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 if (text == Py_None) {
1298 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001299 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001300 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001301 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 return text;
1304 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001305 Py_DECREF(item);
1306 if (rc < 0)
1307 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001308 }
1309
1310 Py_INCREF(default_value);
1311 return default_value;
1312}
1313
Serhiy Storchakacb985562015-05-04 15:32:48 +03001314/*[clinic input]
1315_elementtree.Element.findall
1316
1317 path: object
1318 namespaces: object = None
1319
1320[clinic start generated code]*/
1321
1322static PyObject *
1323_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1324 PyObject *namespaces)
1325/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001326{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001327 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001328 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001329 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001330
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001331 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001332 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001333 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001334 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001335 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001336 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001337
1338 out = PyList_New(0);
1339 if (!out)
1340 return NULL;
1341
1342 if (!self->extra)
1343 return out;
1344
1345 for (i = 0; i < self->extra->length; i++) {
1346 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001347 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001348 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001349 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001350 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001351 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1352 Py_DECREF(item);
1353 Py_DECREF(out);
1354 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001355 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001356 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001357 }
1358
1359 return out;
1360}
1361
Serhiy Storchakacb985562015-05-04 15:32:48 +03001362/*[clinic input]
1363_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365 path: object
1366 namespaces: object = None
1367
1368[clinic start generated code]*/
1369
1370static PyObject *
1371_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1372 PyObject *namespaces)
1373/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1374{
1375 PyObject* tag = path;
1376 _Py_IDENTIFIER(iterfind);
1377 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001378
Victor Stinnerf5616342016-12-09 15:26:00 +01001379 return _PyObject_CallMethodIdObjArgs(
1380 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001381}
1382
Serhiy Storchakacb985562015-05-04 15:32:48 +03001383/*[clinic input]
1384_elementtree.Element.get
1385
1386 key: object
1387 default: object = None
1388
1389[clinic start generated code]*/
1390
1391static PyObject *
1392_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1393 PyObject *default_value)
1394/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001395{
1396 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001397
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001398 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399 value = default_value;
1400 else {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001401 value = PyDict_GetItemWithError(self->extra->attrib, key);
1402 if (!value) {
1403 if (PyErr_Occurred()) {
1404 return NULL;
1405 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001406 value = default_value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001407 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408 }
1409
1410 Py_INCREF(value);
1411 return value;
1412}
1413
Eli Bendersky64d11e62012-06-15 07:42:50 +03001414static PyObject *
1415create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1416
1417
Serhiy Storchakacb985562015-05-04 15:32:48 +03001418/*[clinic input]
1419_elementtree.Element.iter
1420
1421 tag: object = None
1422
1423[clinic start generated code]*/
1424
Eli Bendersky64d11e62012-06-15 07:42:50 +03001425static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001426_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1427/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001428{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001429 if (PyUnicode_Check(tag)) {
1430 if (PyUnicode_READY(tag) < 0)
1431 return NULL;
1432 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1433 tag = Py_None;
1434 }
1435 else if (PyBytes_Check(tag)) {
1436 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1437 tag = Py_None;
1438 }
1439
Eli Bendersky64d11e62012-06-15 07:42:50 +03001440 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001441}
1442
1443
Serhiy Storchakacb985562015-05-04 15:32:48 +03001444/*[clinic input]
1445_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001446
Serhiy Storchakacb985562015-05-04 15:32:48 +03001447[clinic start generated code]*/
1448
1449static PyObject *
1450_elementtree_Element_itertext_impl(ElementObject *self)
1451/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1452{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001453 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454}
1455
Eli Bendersky64d11e62012-06-15 07:42:50 +03001456
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001458element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001460 ElementObject* self = (ElementObject*) self_;
1461
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462 if (!self->extra || index < 0 || index >= self->extra->length) {
1463 PyErr_SetString(
1464 PyExc_IndexError,
1465 "child index out of range"
1466 );
1467 return NULL;
1468 }
1469
1470 Py_INCREF(self->extra->children[index]);
1471 return self->extra->children[index];
1472}
1473
Serhiy Storchakacb985562015-05-04 15:32:48 +03001474/*[clinic input]
1475_elementtree.Element.insert
1476
1477 index: Py_ssize_t
1478 subelement: object(subclass_of='&Element_Type')
1479 /
1480
1481[clinic start generated code]*/
1482
1483static PyObject *
1484_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1485 PyObject *subelement)
1486/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001488 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489
Victor Stinner5f0af232013-07-11 23:01:36 +02001490 if (!self->extra) {
1491 if (create_extra(self, NULL) < 0)
1492 return NULL;
1493 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001495 if (index < 0) {
1496 index += self->extra->length;
1497 if (index < 0)
1498 index = 0;
1499 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500 if (index > self->extra->length)
1501 index = self->extra->length;
1502
1503 if (element_resize(self, 1) < 0)
1504 return NULL;
1505
1506 for (i = self->extra->length; i > index; i--)
1507 self->extra->children[i] = self->extra->children[i-1];
1508
Serhiy Storchakacb985562015-05-04 15:32:48 +03001509 Py_INCREF(subelement);
1510 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511
1512 self->extra->length++;
1513
1514 Py_RETURN_NONE;
1515}
1516
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517/*[clinic input]
1518_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519
Serhiy Storchakacb985562015-05-04 15:32:48 +03001520[clinic start generated code]*/
1521
1522static PyObject *
1523_elementtree_Element_items_impl(ElementObject *self)
1524/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1525{
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001526 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527 return PyList_New(0);
1528
1529 return PyDict_Items(self->extra->attrib);
1530}
1531
Serhiy Storchakacb985562015-05-04 15:32:48 +03001532/*[clinic input]
1533_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534
Serhiy Storchakacb985562015-05-04 15:32:48 +03001535[clinic start generated code]*/
1536
1537static PyObject *
1538_elementtree_Element_keys_impl(ElementObject *self)
1539/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1540{
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001541 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542 return PyList_New(0);
1543
1544 return PyDict_Keys(self->extra->attrib);
1545}
1546
Martin v. Löwis18e16552006-02-15 17:27:45 +00001547static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548element_length(ElementObject* self)
1549{
1550 if (!self->extra)
1551 return 0;
1552
1553 return self->extra->length;
1554}
1555
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556/*[clinic input]
1557_elementtree.Element.makeelement
1558
1559 tag: object
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001560 attrib: object(subclass_of='&PyDict_Type')
Serhiy Storchakacb985562015-05-04 15:32:48 +03001561 /
1562
1563[clinic start generated code]*/
1564
1565static PyObject *
1566_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1567 PyObject *attrib)
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001568/*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569{
1570 PyObject* elem;
1571
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 attrib = PyDict_Copy(attrib);
1573 if (!attrib)
1574 return NULL;
1575
Eli Bendersky092af1f2012-03-04 07:14:03 +02001576 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577
1578 Py_DECREF(attrib);
1579
1580 return elem;
1581}
1582
Serhiy Storchakacb985562015-05-04 15:32:48 +03001583/*[clinic input]
1584_elementtree.Element.remove
1585
1586 subelement: object(subclass_of='&Element_Type')
1587 /
1588
1589[clinic start generated code]*/
1590
1591static PyObject *
1592_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1593/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001595 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001596 int rc;
1597 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599 if (!self->extra) {
1600 /* element has no children, so raise exception */
1601 PyErr_SetString(
1602 PyExc_ValueError,
1603 "list.remove(x): x not in list"
1604 );
1605 return NULL;
1606 }
1607
1608 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001609 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001611 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001612 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001614 if (rc < 0)
1615 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 }
1617
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001618 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001619 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620 PyErr_SetString(
1621 PyExc_ValueError,
1622 "list.remove(x): x not in list"
1623 );
1624 return NULL;
1625 }
1626
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001627 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628
1629 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630 for (; i < self->extra->length; i++)
1631 self->extra->children[i] = self->extra->children[i+1];
1632
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001633 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634 Py_RETURN_NONE;
1635}
1636
1637static PyObject*
1638element_repr(ElementObject* self)
1639{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001640 int status;
1641
1642 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001643 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001644
1645 status = Py_ReprEnter((PyObject *)self);
1646 if (status == 0) {
1647 PyObject *res;
1648 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1649 Py_ReprLeave((PyObject *)self);
1650 return res;
1651 }
1652 if (status > 0)
1653 PyErr_Format(PyExc_RuntimeError,
1654 "reentrant call inside %s.__repr__",
1655 Py_TYPE(self)->tp_name);
1656 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657}
1658
Serhiy Storchakacb985562015-05-04 15:32:48 +03001659/*[clinic input]
1660_elementtree.Element.set
1661
1662 key: object
1663 value: object
1664 /
1665
1666[clinic start generated code]*/
1667
1668static PyObject *
1669_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1670 PyObject *value)
1671/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672{
1673 PyObject* attrib;
1674
Victor Stinner5f0af232013-07-11 23:01:36 +02001675 if (!self->extra) {
1676 if (create_extra(self, NULL) < 0)
1677 return NULL;
1678 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001679
1680 attrib = element_get_attrib(self);
1681 if (!attrib)
1682 return NULL;
1683
1684 if (PyDict_SetItem(attrib, key, value) < 0)
1685 return NULL;
1686
1687 Py_RETURN_NONE;
1688}
1689
1690static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001691element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001693 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695 PyObject* old;
1696
1697 if (!self->extra || index < 0 || index >= self->extra->length) {
1698 PyErr_SetString(
1699 PyExc_IndexError,
1700 "child assignment index out of range");
1701 return -1;
1702 }
1703
1704 old = self->extra->children[index];
1705
1706 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001707 if (!Element_Check(item)) {
1708 raise_type_error(item);
1709 return -1;
1710 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001711 Py_INCREF(item);
1712 self->extra->children[index] = item;
1713 } else {
1714 self->extra->length--;
1715 for (i = index; i < self->extra->length; i++)
1716 self->extra->children[i] = self->extra->children[i+1];
1717 }
1718
1719 Py_DECREF(old);
1720
1721 return 0;
1722}
1723
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724static PyObject*
1725element_subscr(PyObject* self_, PyObject* item)
1726{
1727 ElementObject* self = (ElementObject*) self_;
1728
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001729 if (PyIndex_Check(item)) {
1730 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001731
1732 if (i == -1 && PyErr_Occurred()) {
1733 return NULL;
1734 }
1735 if (i < 0 && self->extra)
1736 i += self->extra->length;
1737 return element_getitem(self_, i);
1738 }
1739 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001740 Py_ssize_t start, stop, step, slicelen, i;
1741 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742 PyObject* list;
1743
1744 if (!self->extra)
1745 return PyList_New(0);
1746
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001747 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748 return NULL;
1749 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001750 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1751 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752
1753 if (slicelen <= 0)
1754 return PyList_New(0);
1755 else {
1756 list = PyList_New(slicelen);
1757 if (!list)
1758 return NULL;
1759
1760 for (cur = start, i = 0; i < slicelen;
1761 cur += step, i++) {
1762 PyObject* item = self->extra->children[cur];
1763 Py_INCREF(item);
1764 PyList_SET_ITEM(list, i, item);
1765 }
1766
1767 return list;
1768 }
1769 }
1770 else {
1771 PyErr_SetString(PyExc_TypeError,
1772 "element indices must be integers");
1773 return NULL;
1774 }
1775}
1776
1777static int
1778element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1779{
1780 ElementObject* self = (ElementObject*) self_;
1781
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 if (PyIndex_Check(item)) {
1783 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784
1785 if (i == -1 && PyErr_Occurred()) {
1786 return -1;
1787 }
1788 if (i < 0 && self->extra)
1789 i += self->extra->length;
1790 return element_setitem(self_, i, value);
1791 }
1792 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001793 Py_ssize_t start, stop, step, slicelen, newlen, i;
1794 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795
1796 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001797 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001798
Victor Stinner5f0af232013-07-11 23:01:36 +02001799 if (!self->extra) {
1800 if (create_extra(self, NULL) < 0)
1801 return -1;
1802 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001804 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001805 return -1;
1806 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001807 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1808 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001809
Eli Bendersky865756a2012-03-09 13:38:15 +02001810 if (value == NULL) {
1811 /* Delete slice */
1812 size_t cur;
1813 Py_ssize_t i;
1814
1815 if (slicelen <= 0)
1816 return 0;
1817
1818 /* Since we're deleting, the direction of the range doesn't matter,
1819 * so for simplicity make it always ascending.
1820 */
1821 if (step < 0) {
1822 stop = start + 1;
1823 start = stop + step * (slicelen - 1) - 1;
1824 step = -step;
1825 }
1826
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001827 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001828
1829 /* recycle is a list that will contain all the children
1830 * scheduled for removal.
1831 */
1832 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001833 return -1;
1834 }
1835
1836 /* This loop walks over all the children that have to be deleted,
1837 * with cur pointing at them. num_moved is the amount of children
1838 * until the next deleted child that have to be "shifted down" to
1839 * occupy the deleted's places.
1840 * Note that in the ith iteration, shifting is done i+i places down
1841 * because i children were already removed.
1842 */
1843 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1844 /* Compute how many children have to be moved, clipping at the
1845 * list end.
1846 */
1847 Py_ssize_t num_moved = step - 1;
1848 if (cur + step >= (size_t)self->extra->length) {
1849 num_moved = self->extra->length - cur - 1;
1850 }
1851
1852 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1853
1854 memmove(
1855 self->extra->children + cur - i,
1856 self->extra->children + cur + 1,
1857 num_moved * sizeof(PyObject *));
1858 }
1859
1860 /* Leftover "tail" after the last removed child */
1861 cur = start + (size_t)slicelen * step;
1862 if (cur < (size_t)self->extra->length) {
1863 memmove(
1864 self->extra->children + cur - slicelen,
1865 self->extra->children + cur,
1866 (self->extra->length - cur) * sizeof(PyObject *));
1867 }
1868
1869 self->extra->length -= slicelen;
1870
1871 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001872 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001873 return 0;
1874 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001875
1876 /* A new slice is actually being assigned */
1877 seq = PySequence_Fast(value, "");
1878 if (!seq) {
1879 PyErr_Format(
1880 PyExc_TypeError,
1881 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1882 );
1883 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001885 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001886
1887 if (step != 1 && newlen != slicelen)
1888 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001889 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001890 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001891 "attempt to assign sequence of size %zd "
1892 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001893 newlen, slicelen
1894 );
1895 return -1;
1896 }
1897
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001898 /* Resize before creating the recycle bin, to prevent refleaks. */
1899 if (newlen > slicelen) {
1900 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001901 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001902 return -1;
1903 }
1904 }
1905
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001906 for (i = 0; i < newlen; i++) {
1907 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1908 if (!Element_Check(element)) {
1909 raise_type_error(element);
1910 Py_DECREF(seq);
1911 return -1;
1912 }
1913 }
1914
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001915 if (slicelen > 0) {
1916 /* to avoid recursive calls to this method (via decref), move
1917 old items to the recycle bin here, and get rid of them when
1918 we're done modifying the element */
1919 recycle = PyList_New(slicelen);
1920 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001921 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001922 return -1;
1923 }
1924 for (cur = start, i = 0; i < slicelen;
1925 cur += step, i++)
1926 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1927 }
1928
1929 if (newlen < slicelen) {
1930 /* delete slice */
1931 for (i = stop; i < self->extra->length; i++)
1932 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1933 } else if (newlen > slicelen) {
1934 /* insert slice */
1935 for (i = self->extra->length-1; i >= stop; i--)
1936 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1937 }
1938
1939 /* replace the slice */
1940 for (cur = start, i = 0; i < newlen;
1941 cur += step, i++) {
1942 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1943 Py_INCREF(element);
1944 self->extra->children[cur] = element;
1945 }
1946
1947 self->extra->length += newlen - slicelen;
1948
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001949 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001950
1951 /* discard the recycle bin, and everything in it */
1952 Py_XDECREF(recycle);
1953
1954 return 0;
1955 }
1956 else {
1957 PyErr_SetString(PyExc_TypeError,
1958 "element indices must be integers");
1959 return -1;
1960 }
1961}
1962
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001963static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001964element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001965{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001966 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001967 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001968 return res;
1969}
1970
Serhiy Storchakadde08152015-11-25 15:28:13 +02001971static PyObject*
1972element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001973{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001974 PyObject *res = element_get_text(self);
1975 Py_XINCREF(res);
1976 return res;
1977}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001978
Serhiy Storchakadde08152015-11-25 15:28:13 +02001979static PyObject*
1980element_tail_getter(ElementObject *self, void *closure)
1981{
1982 PyObject *res = element_get_tail(self);
1983 Py_XINCREF(res);
1984 return res;
1985}
1986
1987static PyObject*
1988element_attrib_getter(ElementObject *self, void *closure)
1989{
1990 PyObject *res;
1991 if (!self->extra) {
1992 if (create_extra(self, NULL) < 0)
1993 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001994 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001995 res = element_get_attrib(self);
1996 Py_XINCREF(res);
1997 return res;
1998}
Victor Stinner4d463432013-07-11 23:05:03 +02001999
Serhiy Storchakadde08152015-11-25 15:28:13 +02002000/* macro for setter validation */
2001#define _VALIDATE_ATTR_VALUE(V) \
2002 if ((V) == NULL) { \
2003 PyErr_SetString( \
2004 PyExc_AttributeError, \
2005 "can't delete element attribute"); \
2006 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007 }
2008
Serhiy Storchakadde08152015-11-25 15:28:13 +02002009static int
2010element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2011{
2012 _VALIDATE_ATTR_VALUE(value);
2013 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002014 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002015 return 0;
2016}
2017
2018static int
2019element_text_setter(ElementObject *self, PyObject *value, void *closure)
2020{
2021 _VALIDATE_ATTR_VALUE(value);
2022 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002023 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002024 return 0;
2025}
2026
2027static int
2028element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2029{
2030 _VALIDATE_ATTR_VALUE(value);
2031 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002032 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002033 return 0;
2034}
2035
2036static int
2037element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2038{
2039 _VALIDATE_ATTR_VALUE(value);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002040 if (!PyDict_Check(value)) {
2041 PyErr_Format(PyExc_TypeError,
2042 "attrib must be dict, not %.200s",
2043 value->ob_type->tp_name);
2044 return -1;
2045 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002046 if (!self->extra) {
2047 if (create_extra(self, NULL) < 0)
2048 return -1;
2049 }
2050 Py_INCREF(value);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002051 Py_XSETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002052 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002053}
2054
2055static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002056 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002057 0, /* sq_concat */
2058 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002059 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002060 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002061 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002062 0,
2063};
2064
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065/******************************* Element iterator ****************************/
2066
2067/* ElementIterObject represents the iteration state over an XML element in
2068 * pre-order traversal. To keep track of which sub-element should be returned
2069 * next, a stack of parents is maintained. This is a standard stack-based
2070 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002071 * The stack is managed using a continuous array.
2072 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002073 * the current one is exhausted, and the next child to examine in that parent.
2074 */
2075typedef struct ParentLocator_t {
2076 ElementObject *parent;
2077 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078} ParentLocator;
2079
2080typedef struct {
2081 PyObject_HEAD
2082 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002083 Py_ssize_t parent_stack_used;
2084 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002085 ElementObject *root_element;
2086 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002087 int gettext;
2088} ElementIterObject;
2089
2090
2091static void
2092elementiter_dealloc(ElementIterObject *it)
2093{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002094 Py_ssize_t i = it->parent_stack_used;
2095 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002096 /* bpo-31095: UnTrack is needed before calling any callbacks */
2097 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002098 while (i--)
2099 Py_XDECREF(it->parent_stack[i].parent);
2100 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101
2102 Py_XDECREF(it->sought_tag);
2103 Py_XDECREF(it->root_element);
2104
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105 PyObject_GC_Del(it);
2106}
2107
2108static int
2109elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2110{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002111 Py_ssize_t i = it->parent_stack_used;
2112 while (i--)
2113 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114
2115 Py_VISIT(it->root_element);
2116 Py_VISIT(it->sought_tag);
2117 return 0;
2118}
2119
2120/* Helper function for elementiter_next. Add a new parent to the parent stack.
2121 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122static int
2123parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002125 ParentLocator *item;
2126
2127 if (it->parent_stack_used >= it->parent_stack_size) {
2128 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2129 ParentLocator *parent_stack = it->parent_stack;
2130 PyMem_Resize(parent_stack, ParentLocator, new_size);
2131 if (parent_stack == NULL)
2132 return -1;
2133 it->parent_stack = parent_stack;
2134 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002135 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002136 item = it->parent_stack + it->parent_stack_used++;
2137 Py_INCREF(parent);
2138 item->parent = parent;
2139 item->child_index = 0;
2140 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141}
2142
2143static PyObject *
2144elementiter_next(ElementIterObject *it)
2145{
2146 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002147 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 * A short note on gettext: this function serves both the iter() and
2149 * itertext() methods to avoid code duplication. However, there are a few
2150 * small differences in the way these iterations work. Namely:
2151 * - itertext() only yields text from nodes that have it, and continues
2152 * iterating when a node doesn't have text (so it doesn't return any
2153 * node like iter())
2154 * - itertext() also has to handle tail, after finishing with all the
2155 * children of a node.
2156 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002157 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002158 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002159 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160
2161 while (1) {
2162 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002163 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002164 * iterator is exhausted.
2165 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002166 if (!it->parent_stack_used) {
2167 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002168 PyErr_SetNone(PyExc_StopIteration);
2169 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002170 }
2171
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002172 elem = it->root_element; /* steals a reference */
2173 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002174 }
2175 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 /* See if there are children left to traverse in the current parent. If
2177 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002178 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002179 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2180 Py_ssize_t child_index = item->child_index;
2181 ElementObjectExtra *extra;
2182 elem = item->parent;
2183 extra = elem->extra;
2184 if (!extra || child_index >= extra->length) {
2185 it->parent_stack_used--;
2186 /* Note that extra condition on it->parent_stack_used here;
2187 * this is because itertext() is supposed to only return *inner*
2188 * text, not text following the element it began iteration with.
2189 */
2190 if (it->gettext && it->parent_stack_used) {
2191 text = element_get_tail(elem);
2192 goto gettext;
2193 }
2194 Py_DECREF(elem);
2195 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002196 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002197
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002198 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002199 elem = (ElementObject *)extra->children[child_index];
2200 item->child_index++;
2201 Py_INCREF(elem);
2202 }
2203
2204 if (parent_stack_push_new(it, elem) < 0) {
2205 Py_DECREF(elem);
2206 PyErr_NoMemory();
2207 return NULL;
2208 }
2209 if (it->gettext) {
2210 text = element_get_text(elem);
2211 goto gettext;
2212 }
2213
2214 if (it->sought_tag == Py_None)
2215 return (PyObject *)elem;
2216
2217 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2218 if (rc > 0)
2219 return (PyObject *)elem;
2220
2221 Py_DECREF(elem);
2222 if (rc < 0)
2223 return NULL;
2224 continue;
2225
2226gettext:
2227 if (!text) {
2228 Py_DECREF(elem);
2229 return NULL;
2230 }
2231 if (text == Py_None) {
2232 Py_DECREF(elem);
2233 }
2234 else {
2235 Py_INCREF(text);
2236 Py_DECREF(elem);
2237 rc = PyObject_IsTrue(text);
2238 if (rc > 0)
2239 return text;
2240 Py_DECREF(text);
2241 if (rc < 0)
2242 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002243 }
2244 }
2245
2246 return NULL;
2247}
2248
2249
2250static PyTypeObject ElementIter_Type = {
2251 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002252 /* Using the module's name since the pure-Python implementation does not
2253 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254 "_elementtree._element_iterator", /* tp_name */
2255 sizeof(ElementIterObject), /* tp_basicsize */
2256 0, /* tp_itemsize */
2257 /* methods */
2258 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002259 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260 0, /* tp_getattr */
2261 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002262 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002263 0, /* tp_repr */
2264 0, /* tp_as_number */
2265 0, /* tp_as_sequence */
2266 0, /* tp_as_mapping */
2267 0, /* tp_hash */
2268 0, /* tp_call */
2269 0, /* tp_str */
2270 0, /* tp_getattro */
2271 0, /* tp_setattro */
2272 0, /* tp_as_buffer */
2273 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2274 0, /* tp_doc */
2275 (traverseproc)elementiter_traverse, /* tp_traverse */
2276 0, /* tp_clear */
2277 0, /* tp_richcompare */
2278 0, /* tp_weaklistoffset */
2279 PyObject_SelfIter, /* tp_iter */
2280 (iternextfunc)elementiter_next, /* tp_iternext */
2281 0, /* tp_methods */
2282 0, /* tp_members */
2283 0, /* tp_getset */
2284 0, /* tp_base */
2285 0, /* tp_dict */
2286 0, /* tp_descr_get */
2287 0, /* tp_descr_set */
2288 0, /* tp_dictoffset */
2289 0, /* tp_init */
2290 0, /* tp_alloc */
2291 0, /* tp_new */
2292};
2293
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002294#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002295
2296static PyObject *
2297create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2298{
2299 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002300
2301 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2302 if (!it)
2303 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002304
Victor Stinner4d463432013-07-11 23:05:03 +02002305 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002306 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002307 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002308 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002309 it->root_element = self;
2310
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002311 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002312 if (it->parent_stack == NULL) {
2313 Py_DECREF(it);
2314 PyErr_NoMemory();
2315 return NULL;
2316 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002317 it->parent_stack_used = 0;
2318 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002319
Victor Stinner1b184552019-10-08 00:09:31 +02002320 PyObject_GC_Track(it);
2321
Eli Bendersky64d11e62012-06-15 07:42:50 +03002322 return (PyObject *)it;
2323}
2324
2325
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326/* ==================================================================== */
2327/* the tree builder type */
2328
2329typedef struct {
2330 PyObject_HEAD
2331
Eli Bendersky58d548d2012-05-29 15:45:16 +03002332 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002333
Antoine Pitrouee329312012-10-04 19:53:29 +02002334 PyObject *this; /* current node */
2335 PyObject *last; /* most recently created node */
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002336 PyObject *last_for_tail; /* most recently created node that takes a tail */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337
Eli Bendersky58d548d2012-05-29 15:45:16 +03002338 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
Eli Bendersky58d548d2012-05-29 15:45:16 +03002340 PyObject *stack; /* element stack */
2341 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002342
Eli Bendersky48d358b2012-05-30 17:57:50 +03002343 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002344 PyObject *comment_factory;
2345 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002346
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002348 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002349 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2350 PyObject *end_event_obj;
2351 PyObject *start_ns_event_obj;
2352 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002353 PyObject *comment_event_obj;
2354 PyObject *pi_event_obj;
2355
2356 char insert_comments;
2357 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358} TreeBuilderObject;
2359
Andy Lesterdffe4c02020-03-04 07:15:20 -06002360#define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361
2362/* -------------------------------------------------------------------- */
2363/* constructor and destructor */
2364
Eli Bendersky58d548d2012-05-29 15:45:16 +03002365static PyObject *
2366treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002367{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002368 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2369 if (t != NULL) {
2370 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371
Eli Bendersky58d548d2012-05-29 15:45:16 +03002372 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002373 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002374 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002375 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002376
Eli Bendersky58d548d2012-05-29 15:45:16 +03002377 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002378 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002379 t->comment_factory = NULL;
2380 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002381 t->stack = PyList_New(20);
2382 if (!t->stack) {
2383 Py_DECREF(t->this);
2384 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002385 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002386 return NULL;
2387 }
2388 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002389
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002390 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 t->start_event_obj = t->end_event_obj = NULL;
2392 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002393 t->comment_event_obj = t->pi_event_obj = NULL;
2394 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002395 }
2396 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002397}
2398
Serhiy Storchakacb985562015-05-04 15:32:48 +03002399/*[clinic input]
2400_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002401
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002402 element_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002403 *
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002404 comment_factory: object = None
2405 pi_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002406 insert_comments: bool = False
2407 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002408
2409[clinic start generated code]*/
2410
2411static int
2412_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002413 PyObject *element_factory,
2414 PyObject *comment_factory,
2415 PyObject *pi_factory,
2416 int insert_comments, int insert_pis)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002417/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002418{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002419 if (element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002421 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002422 } else {
2423 Py_CLEAR(self->element_factory);
2424 }
2425
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002426 if (comment_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002427 elementtreestate *st = ET_STATE_GLOBAL;
2428 comment_factory = st->comment_factory;
2429 }
2430 if (comment_factory) {
2431 Py_INCREF(comment_factory);
2432 Py_XSETREF(self->comment_factory, comment_factory);
2433 self->insert_comments = insert_comments;
2434 } else {
2435 Py_CLEAR(self->comment_factory);
2436 self->insert_comments = 0;
2437 }
2438
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002439 if (pi_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002440 elementtreestate *st = ET_STATE_GLOBAL;
2441 pi_factory = st->pi_factory;
2442 }
2443 if (pi_factory) {
2444 Py_INCREF(pi_factory);
2445 Py_XSETREF(self->pi_factory, pi_factory);
2446 self->insert_pis = insert_pis;
2447 } else {
2448 Py_CLEAR(self->pi_factory);
2449 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002450 }
2451
Eli Bendersky58d548d2012-05-29 15:45:16 +03002452 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002453}
2454
Eli Bendersky48d358b2012-05-30 17:57:50 +03002455static int
2456treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2457{
Stefan Behnel43851a22019-05-01 21:20:38 +02002458 Py_VISIT(self->pi_event_obj);
2459 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002460 Py_VISIT(self->end_ns_event_obj);
2461 Py_VISIT(self->start_ns_event_obj);
2462 Py_VISIT(self->end_event_obj);
2463 Py_VISIT(self->start_event_obj);
2464 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002465 Py_VISIT(self->root);
2466 Py_VISIT(self->this);
2467 Py_VISIT(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002468 Py_VISIT(self->last_for_tail);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002469 Py_VISIT(self->data);
2470 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002471 Py_VISIT(self->pi_factory);
2472 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002473 Py_VISIT(self->element_factory);
2474 return 0;
2475}
2476
2477static int
2478treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479{
Stefan Behnel43851a22019-05-01 21:20:38 +02002480 Py_CLEAR(self->pi_event_obj);
2481 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002482 Py_CLEAR(self->end_ns_event_obj);
2483 Py_CLEAR(self->start_ns_event_obj);
2484 Py_CLEAR(self->end_event_obj);
2485 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002486 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002487 Py_CLEAR(self->stack);
2488 Py_CLEAR(self->data);
2489 Py_CLEAR(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002490 Py_CLEAR(self->last_for_tail);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002491 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002492 Py_CLEAR(self->pi_factory);
2493 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002494 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002495 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002496 return 0;
2497}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498
Eli Bendersky48d358b2012-05-30 17:57:50 +03002499static void
2500treebuilder_dealloc(TreeBuilderObject *self)
2501{
2502 PyObject_GC_UnTrack(self);
2503 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002504 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505}
2506
2507/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002508/* helpers for handling of arbitrary element-like objects */
2509
Stefan Behnel43851a22019-05-01 21:20:38 +02002510/*[clinic input]
2511_elementtree._set_factories
2512
2513 comment_factory: object
2514 pi_factory: object
2515 /
2516
2517Change the factories used to create comments and processing instructions.
2518
2519For internal use only.
2520[clinic start generated code]*/
2521
2522static PyObject *
2523_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2524 PyObject *pi_factory)
2525/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2526{
2527 elementtreestate *st = ET_STATE_GLOBAL;
2528 PyObject *old;
2529
2530 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2531 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2532 Py_TYPE(comment_factory)->tp_name);
2533 return NULL;
2534 }
2535 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2536 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2537 Py_TYPE(pi_factory)->tp_name);
2538 return NULL;
2539 }
2540
2541 old = PyTuple_Pack(2,
2542 st->comment_factory ? st->comment_factory : Py_None,
2543 st->pi_factory ? st->pi_factory : Py_None);
2544
2545 if (comment_factory == Py_None) {
2546 Py_CLEAR(st->comment_factory);
2547 } else {
2548 Py_INCREF(comment_factory);
2549 Py_XSETREF(st->comment_factory, comment_factory);
2550 }
2551 if (pi_factory == Py_None) {
2552 Py_CLEAR(st->pi_factory);
2553 } else {
2554 Py_INCREF(pi_factory);
2555 Py_XSETREF(st->pi_factory, pi_factory);
2556 }
2557
2558 return old;
2559}
2560
Antoine Pitrouee329312012-10-04 19:53:29 +02002561static int
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002562treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2563 PyObject **dest, _Py_Identifier *name)
Antoine Pitrouee329312012-10-04 19:53:29 +02002564{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002565 /* Fast paths for the "almost always" cases. */
Antoine Pitrouee329312012-10-04 19:53:29 +02002566 if (Element_CheckExact(element)) {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002567 PyObject *dest_obj = JOIN_OBJ(*dest);
2568 if (dest_obj == Py_None) {
2569 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2570 *data = NULL;
2571 Py_DECREF(dest_obj);
2572 return 0;
2573 }
2574 else if (JOIN_GET(*dest)) {
2575 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2576 return -1;
2577 }
2578 Py_CLEAR(*data);
2579 return 0;
2580 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002581 }
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002582
2583 /* Fallback for the non-Element / non-trivial cases. */
2584 {
Antoine Pitrouee329312012-10-04 19:53:29 +02002585 int r;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002586 PyObject* joined;
2587 PyObject* previous = _PyObject_GetAttrId(element, name);
2588 if (!previous)
Antoine Pitrouee329312012-10-04 19:53:29 +02002589 return -1;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002590 joined = list_join(*data);
2591 if (!joined) {
2592 Py_DECREF(previous);
2593 return -1;
2594 }
2595 if (previous != Py_None) {
2596 PyObject *tmp = PyNumber_Add(previous, joined);
2597 Py_DECREF(joined);
2598 Py_DECREF(previous);
2599 if (!tmp)
2600 return -1;
2601 joined = tmp;
2602 } else {
2603 Py_DECREF(previous);
2604 }
2605
Antoine Pitrouee329312012-10-04 19:53:29 +02002606 r = _PyObject_SetAttrId(element, name, joined);
2607 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002608 if (r < 0)
2609 return -1;
2610 Py_CLEAR(*data);
2611 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002612 }
2613}
2614
Serhiy Storchaka576def02017-03-30 09:47:31 +03002615LOCAL(int)
2616treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002617{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002618 if (!self->data) {
2619 return 0;
2620 }
2621
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002622 if (!self->last_for_tail) {
2623 PyObject *element = self->last;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002624 _Py_IDENTIFIER(text);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002625 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002626 element, &self->data,
2627 &((ElementObject *) element)->text, &PyId_text);
2628 }
2629 else {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002630 PyObject *element = self->last_for_tail;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002631 _Py_IDENTIFIER(tail);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002632 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002633 element, &self->data,
2634 &((ElementObject *) element)->tail, &PyId_tail);
2635 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002636}
2637
2638static int
2639treebuilder_add_subelement(PyObject *element, PyObject *child)
2640{
2641 _Py_IDENTIFIER(append);
2642 if (Element_CheckExact(element)) {
2643 ElementObject *elem = (ElementObject *) element;
2644 return element_add_subelement(elem, child);
2645 }
2646 else {
2647 PyObject *res;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002648 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
Antoine Pitrouee329312012-10-04 19:53:29 +02002649 if (res == NULL)
2650 return -1;
2651 Py_DECREF(res);
2652 return 0;
2653 }
2654}
2655
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002656LOCAL(int)
2657treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2658 PyObject *node)
2659{
2660 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002661 PyObject *res;
2662 PyObject *event = PyTuple_Pack(2, action, node);
2663 if (event == NULL)
2664 return -1;
Petr Viktorinffd97532020-02-11 17:46:57 +01002665 res = PyObject_CallOneArg(self->events_append, event);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002666 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002667 if (res == NULL)
2668 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002669 Py_DECREF(res);
2670 }
2671 return 0;
2672}
2673
Antoine Pitrouee329312012-10-04 19:53:29 +02002674/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675/* handlers */
2676
2677LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2679 PyObject* attrib)
2680{
2681 PyObject* node;
2682 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002683 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684
Serhiy Storchaka576def02017-03-30 09:47:31 +03002685 if (treebuilder_flush_data(self) < 0) {
2686 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 }
2688
Stefan Behnel43851a22019-05-01 21:20:38 +02002689 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002690 node = create_new_element(tag, attrib);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002691 } else if (attrib == NULL) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002692 attrib = PyDict_New();
2693 if (!attrib)
2694 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002695 node = PyObject_CallFunctionObjArgs(self->element_factory,
2696 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002697 Py_DECREF(attrib);
2698 }
2699 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002700 node = PyObject_CallFunctionObjArgs(self->element_factory,
2701 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002702 }
2703 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002705 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
Antoine Pitrouee329312012-10-04 19:53:29 +02002707 this = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002708 Py_CLEAR(self->last_for_tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
2710 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002711 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002712 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713 } else {
2714 if (self->root) {
2715 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002716 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 "multiple elements on top level"
2718 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002719 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720 }
2721 Py_INCREF(node);
2722 self->root = node;
2723 }
2724
2725 if (self->index < PyList_GET_SIZE(self->stack)) {
2726 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002727 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728 Py_INCREF(this);
2729 } else {
2730 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002731 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732 }
2733 self->index++;
2734
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002736 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002738 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002740 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2741 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
2743 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002744
2745 error:
2746 Py_DECREF(node);
2747 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748}
2749
2750LOCAL(PyObject*)
2751treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2752{
2753 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002754 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002755 /* ignore calls to data before the first call to start */
2756 Py_RETURN_NONE;
2757 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758 /* store the first item as is */
2759 Py_INCREF(data); self->data = data;
2760 } else {
2761 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002762 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2763 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002764 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 /* expat often generates single character data sections; handle
2766 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002767 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2768 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002770 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771 } else if (PyList_CheckExact(self->data)) {
2772 if (PyList_Append(self->data, data) < 0)
2773 return NULL;
2774 } else {
2775 PyObject* list = PyList_New(2);
2776 if (!list)
2777 return NULL;
2778 PyList_SET_ITEM(list, 0, self->data);
2779 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2780 self->data = list;
2781 }
2782 }
2783
2784 Py_RETURN_NONE;
2785}
2786
2787LOCAL(PyObject*)
2788treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2789{
2790 PyObject* item;
2791
Serhiy Storchaka576def02017-03-30 09:47:31 +03002792 if (treebuilder_flush_data(self) < 0) {
2793 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 }
2795
2796 if (self->index == 0) {
2797 PyErr_SetString(
2798 PyExc_IndexError,
2799 "pop from empty stack"
2800 );
2801 return NULL;
2802 }
2803
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002804 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002805 self->last = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002806 Py_INCREF(self->last);
2807 Py_XSETREF(self->last_for_tail, self->last);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002808 self->index--;
2809 self->this = PyList_GET_ITEM(self->stack, self->index);
2810 Py_INCREF(self->this);
2811 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002813 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2814 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815
2816 Py_INCREF(self->last);
2817 return (PyObject*) self->last;
2818}
2819
Stefan Behnel43851a22019-05-01 21:20:38 +02002820LOCAL(PyObject*)
2821treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2822{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002823 PyObject* comment;
Stefan Behnel43851a22019-05-01 21:20:38 +02002824 PyObject* this;
2825
2826 if (treebuilder_flush_data(self) < 0) {
2827 return NULL;
2828 }
2829
2830 if (self->comment_factory) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002831 comment = PyObject_CallOneArg(self->comment_factory, text);
Stefan Behnel43851a22019-05-01 21:20:38 +02002832 if (!comment)
2833 return NULL;
2834
2835 this = self->this;
2836 if (self->insert_comments && this != Py_None) {
2837 if (treebuilder_add_subelement(this, comment) < 0)
2838 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002839 Py_INCREF(comment);
2840 Py_XSETREF(self->last_for_tail, comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02002841 }
2842 } else {
2843 Py_INCREF(text);
2844 comment = text;
2845 }
2846
2847 if (self->events_append && self->comment_event_obj) {
2848 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2849 goto error;
2850 }
2851
2852 return comment;
2853
2854 error:
2855 Py_DECREF(comment);
2856 return NULL;
2857}
2858
2859LOCAL(PyObject*)
2860treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2861{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002862 PyObject* pi;
Stefan Behnel43851a22019-05-01 21:20:38 +02002863 PyObject* this;
2864 PyObject* stack[2] = {target, text};
2865
2866 if (treebuilder_flush_data(self) < 0) {
2867 return NULL;
2868 }
2869
2870 if (self->pi_factory) {
2871 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2872 if (!pi) {
2873 return NULL;
2874 }
2875
2876 this = self->this;
2877 if (self->insert_pis && this != Py_None) {
2878 if (treebuilder_add_subelement(this, pi) < 0)
2879 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002880 Py_INCREF(pi);
2881 Py_XSETREF(self->last_for_tail, pi);
Stefan Behnel43851a22019-05-01 21:20:38 +02002882 }
2883 } else {
2884 pi = PyTuple_Pack(2, target, text);
2885 if (!pi) {
2886 return NULL;
2887 }
2888 }
2889
2890 if (self->events_append && self->pi_event_obj) {
2891 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2892 goto error;
2893 }
2894
2895 return pi;
2896
2897 error:
2898 Py_DECREF(pi);
2899 return NULL;
2900}
2901
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002902LOCAL(PyObject*)
2903treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2904{
2905 PyObject* parcel;
2906
2907 if (self->events_append && self->start_ns_event_obj) {
2908 parcel = PyTuple_Pack(2, prefix, uri);
2909 if (!parcel) {
2910 return NULL;
2911 }
2912
2913 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2914 Py_DECREF(parcel);
2915 return NULL;
2916 }
2917 Py_DECREF(parcel);
2918 }
2919
2920 Py_RETURN_NONE;
2921}
2922
2923LOCAL(PyObject*)
2924treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2925{
2926 if (self->events_append && self->end_ns_event_obj) {
2927 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2928 return NULL;
2929 }
2930 }
2931
2932 Py_RETURN_NONE;
2933}
2934
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935/* -------------------------------------------------------------------- */
2936/* methods (in alphabetical order) */
2937
Serhiy Storchakacb985562015-05-04 15:32:48 +03002938/*[clinic input]
2939_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940
Serhiy Storchakacb985562015-05-04 15:32:48 +03002941 data: object
2942 /
2943
2944[clinic start generated code]*/
2945
2946static PyObject *
2947_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2948/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2949{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950 return treebuilder_handle_data(self, data);
2951}
2952
Serhiy Storchakacb985562015-05-04 15:32:48 +03002953/*[clinic input]
2954_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955
Serhiy Storchakacb985562015-05-04 15:32:48 +03002956 tag: object
2957 /
2958
2959[clinic start generated code]*/
2960
2961static PyObject *
2962_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2963/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2964{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 return treebuilder_handle_end(self, tag);
2966}
2967
Stefan Behnel43851a22019-05-01 21:20:38 +02002968/*[clinic input]
2969_elementtree.TreeBuilder.comment
2970
2971 text: object
2972 /
2973
2974[clinic start generated code]*/
2975
2976static PyObject *
2977_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2978/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2979{
2980 return treebuilder_handle_comment(self, text);
2981}
2982
2983/*[clinic input]
2984_elementtree.TreeBuilder.pi
2985
2986 target: object
2987 text: object = None
2988 /
2989
2990[clinic start generated code]*/
2991
2992static PyObject *
2993_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2994 PyObject *text)
2995/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2996{
2997 return treebuilder_handle_pi(self, target, text);
2998}
2999
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000LOCAL(PyObject*)
3001treebuilder_done(TreeBuilderObject* self)
3002{
3003 PyObject* res;
3004
3005 /* FIXME: check stack size? */
3006
3007 if (self->root)
3008 res = self->root;
3009 else
3010 res = Py_None;
3011
3012 Py_INCREF(res);
3013 return res;
3014}
3015
Serhiy Storchakacb985562015-05-04 15:32:48 +03003016/*[clinic input]
3017_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018
Serhiy Storchakacb985562015-05-04 15:32:48 +03003019[clinic start generated code]*/
3020
3021static PyObject *
3022_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3023/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3024{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025 return treebuilder_done(self);
3026}
3027
Serhiy Storchakacb985562015-05-04 15:32:48 +03003028/*[clinic input]
3029_elementtree.TreeBuilder.start
3030
3031 tag: object
Shantanu4edc95c2020-03-01 22:33:24 -08003032 attrs: object(subclass_of='&PyDict_Type')
Serhiy Storchakacb985562015-05-04 15:32:48 +03003033 /
3034
3035[clinic start generated code]*/
3036
3037static PyObject *
3038_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3039 PyObject *attrs)
Shantanu4edc95c2020-03-01 22:33:24 -08003040/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003042 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043}
3044
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045/* ==================================================================== */
3046/* the expat interface */
3047
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003049#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003050
3051/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3052 * cached globally without being in per-module state.
3053 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003054static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056
Eli Bendersky52467b12012-06-01 07:13:08 +03003057static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3058 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3059
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060typedef struct {
3061 PyObject_HEAD
3062
3063 XML_Parser parser;
3064
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003065 PyObject *target;
3066 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003068 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003070 PyObject *handle_start_ns;
3071 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003072 PyObject *handle_start;
3073 PyObject *handle_data;
3074 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003076 PyObject *handle_comment;
3077 PyObject *handle_pi;
3078 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003080 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003081
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003082} XMLParserObject;
3083
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003084/* helpers */
3085
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086LOCAL(PyObject*)
3087makeuniversal(XMLParserObject* self, const char* string)
3088{
3089 /* convert a UTF-8 tag/attribute name from the expat parser
3090 to a universal name string */
3091
Antoine Pitrouc1948842012-10-01 23:40:37 +02003092 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093 PyObject* key;
3094 PyObject* value;
3095
3096 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003097 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098 if (!key)
3099 return NULL;
3100
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003101 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102
3103 if (value) {
3104 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003105 }
3106 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003107 /* new name. convert to universal name, and decode as
3108 necessary */
3109
3110 PyObject* tag;
3111 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003112 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003113
3114 /* look for namespace separator */
3115 for (i = 0; i < size; i++)
3116 if (string[i] == '}')
3117 break;
3118 if (i != size) {
3119 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003120 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003121 if (tag == NULL) {
3122 Py_DECREF(key);
3123 return NULL;
3124 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003125 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003126 p[0] = '{';
3127 memcpy(p+1, string, size);
3128 size++;
3129 } else {
3130 /* plain name; use key as tag */
3131 Py_INCREF(key);
3132 tag = key;
3133 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003134
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003136 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003137 value = PyUnicode_DecodeUTF8(p, size, "strict");
3138 Py_DECREF(tag);
3139 if (!value) {
3140 Py_DECREF(key);
3141 return NULL;
3142 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003143
3144 /* add to names dictionary */
3145 if (PyDict_SetItem(self->names, key, value) < 0) {
3146 Py_DECREF(key);
3147 Py_DECREF(value);
3148 return NULL;
3149 }
3150 }
3151
3152 Py_DECREF(key);
3153 return value;
3154}
3155
Eli Bendersky5b77d812012-03-16 08:20:05 +02003156/* Set the ParseError exception with the given parameters.
3157 * If message is not NULL, it's used as the error string. Otherwise, the
3158 * message string is the default for the given error_code.
3159*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003160static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003161expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3162 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003163{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003164 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003165 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003166
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003167 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003168 message ? message : EXPAT(ErrorString)(error_code),
3169 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003170 if (errmsg == NULL)
3171 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003172
Petr Viktorinffd97532020-02-11 17:46:57 +01003173 error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003174 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003175 if (!error)
3176 return;
3177
Eli Bendersky5b77d812012-03-16 08:20:05 +02003178 /* Add code and position attributes */
3179 code = PyLong_FromLong((long)error_code);
3180 if (!code) {
3181 Py_DECREF(error);
3182 return;
3183 }
3184 if (PyObject_SetAttrString(error, "code", code) == -1) {
3185 Py_DECREF(error);
3186 Py_DECREF(code);
3187 return;
3188 }
3189 Py_DECREF(code);
3190
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003191 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003192 if (!position) {
3193 Py_DECREF(error);
3194 return;
3195 }
3196 if (PyObject_SetAttrString(error, "position", position) == -1) {
3197 Py_DECREF(error);
3198 Py_DECREF(position);
3199 return;
3200 }
3201 Py_DECREF(position);
3202
Eli Bendersky532d03e2013-08-10 08:00:39 -07003203 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003204 Py_DECREF(error);
3205}
3206
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207/* -------------------------------------------------------------------- */
3208/* handlers */
3209
3210static void
3211expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3212 int data_len)
3213{
3214 PyObject* key;
3215 PyObject* value;
3216 PyObject* res;
3217
3218 if (data_len < 2 || data_in[0] != '&')
3219 return;
3220
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003221 if (PyErr_Occurred())
3222 return;
3223
Neal Norwitz0269b912007-08-08 06:56:02 +00003224 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 if (!key)
3226 return;
3227
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003228 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229
3230 if (value) {
3231 if (TreeBuilder_CheckExact(self->target))
3232 res = treebuilder_handle_data(
3233 (TreeBuilderObject*) self->target, value
3234 );
3235 else if (self->handle_data)
Petr Viktorinffd97532020-02-11 17:46:57 +01003236 res = PyObject_CallOneArg(self->handle_data, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 else
3238 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003240 } else if (!PyErr_Occurred()) {
3241 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003242 char message[128] = "undefined entity ";
3243 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003244 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003245 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003247 EXPAT(GetErrorColumnNumber)(self->parser),
3248 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 );
3250 }
3251
3252 Py_DECREF(key);
3253}
3254
3255static void
3256expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3257 const XML_Char **attrib_in)
3258{
3259 PyObject* res;
3260 PyObject* tag;
3261 PyObject* attrib;
3262 int ok;
3263
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003264 if (PyErr_Occurred())
3265 return;
3266
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 /* tag name */
3268 tag = makeuniversal(self, tag_in);
3269 if (!tag)
3270 return; /* parser will look for errors */
3271
3272 /* attributes */
3273 if (attrib_in[0]) {
3274 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003275 if (!attrib) {
3276 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003278 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 while (attrib_in[0] && attrib_in[1]) {
3280 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003281 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 if (!key || !value) {
3283 Py_XDECREF(value);
3284 Py_XDECREF(key);
3285 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003286 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 return;
3288 }
3289 ok = PyDict_SetItem(attrib, key, value);
3290 Py_DECREF(value);
3291 Py_DECREF(key);
3292 if (ok < 0) {
3293 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003294 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 return;
3296 }
3297 attrib_in += 2;
3298 }
3299 } else {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003300 attrib = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003301 }
3302
3303 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 /* shortcut */
3305 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3306 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003307 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003308 else if (self->handle_start) {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003309 if (attrib == NULL) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003310 attrib = PyDict_New();
3311 if (!attrib) {
3312 Py_DECREF(tag);
3313 return;
3314 }
3315 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003316 res = PyObject_CallFunctionObjArgs(self->handle_start,
3317 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003318 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 res = NULL;
3320
3321 Py_DECREF(tag);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003322 Py_XDECREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323
3324 Py_XDECREF(res);
3325}
3326
3327static void
3328expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3329 int data_len)
3330{
3331 PyObject* data;
3332 PyObject* res;
3333
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003334 if (PyErr_Occurred())
3335 return;
3336
Neal Norwitz0269b912007-08-08 06:56:02 +00003337 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003338 if (!data)
3339 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340
3341 if (TreeBuilder_CheckExact(self->target))
3342 /* shortcut */
3343 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3344 else if (self->handle_data)
Petr Viktorinffd97532020-02-11 17:46:57 +01003345 res = PyObject_CallOneArg(self->handle_data, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 else
3347 res = NULL;
3348
3349 Py_DECREF(data);
3350
3351 Py_XDECREF(res);
3352}
3353
3354static void
3355expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3356{
3357 PyObject* tag;
3358 PyObject* res = NULL;
3359
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003360 if (PyErr_Occurred())
3361 return;
3362
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003363 if (TreeBuilder_CheckExact(self->target))
3364 /* shortcut */
3365 /* the standard tree builder doesn't look at the end tag */
3366 res = treebuilder_handle_end(
3367 (TreeBuilderObject*) self->target, Py_None
3368 );
3369 else if (self->handle_end) {
3370 tag = makeuniversal(self, tag_in);
3371 if (tag) {
Petr Viktorinffd97532020-02-11 17:46:57 +01003372 res = PyObject_CallOneArg(self->handle_end, tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 Py_DECREF(tag);
3374 }
3375 }
3376
3377 Py_XDECREF(res);
3378}
3379
3380static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003381expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3382 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003384 PyObject* res = NULL;
3385 PyObject* uri;
3386 PyObject* prefix;
3387 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003388
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003389 if (PyErr_Occurred())
3390 return;
3391
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003392 if (!uri_in)
3393 uri_in = "";
3394 if (!prefix_in)
3395 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003396
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003397 if (TreeBuilder_CheckExact(self->target)) {
3398 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3399 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003400
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003401 if (target->events_append && target->start_ns_event_obj) {
3402 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3403 if (!prefix)
3404 return;
3405 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3406 if (!uri) {
3407 Py_DECREF(prefix);
3408 return;
3409 }
3410
3411 res = treebuilder_handle_start_ns(target, prefix, uri);
3412 Py_DECREF(uri);
3413 Py_DECREF(prefix);
3414 }
3415 } else if (self->handle_start_ns) {
3416 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3417 if (!prefix)
3418 return;
3419 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3420 if (!uri) {
3421 Py_DECREF(prefix);
3422 return;
3423 }
3424
3425 stack[0] = prefix;
3426 stack[1] = uri;
3427 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3428 Py_DECREF(uri);
3429 Py_DECREF(prefix);
3430 }
3431
3432 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433}
3434
3435static void
3436expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3437{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003438 PyObject *res = NULL;
3439 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003440
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003441 if (PyErr_Occurred())
3442 return;
3443
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003444 if (!prefix_in)
3445 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003446
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003447 if (TreeBuilder_CheckExact(self->target)) {
3448 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3449 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3450
3451 if (target->events_append && target->end_ns_event_obj) {
3452 res = treebuilder_handle_end_ns(target, Py_None);
3453 }
3454 } else if (self->handle_end_ns) {
3455 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3456 if (!prefix)
3457 return;
3458
Petr Viktorinffd97532020-02-11 17:46:57 +01003459 res = PyObject_CallOneArg(self->handle_end_ns, prefix);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003460 Py_DECREF(prefix);
3461 }
3462
3463 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464}
3465
3466static void
3467expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3468{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003469 PyObject* comment;
3470 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003472 if (PyErr_Occurred())
3473 return;
3474
Stefan Behnel43851a22019-05-01 21:20:38 +02003475 if (TreeBuilder_CheckExact(self->target)) {
3476 /* shortcut */
3477 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3478
Neal Norwitz0269b912007-08-08 06:56:02 +00003479 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003480 if (!comment)
3481 return; /* parser will look for errors */
3482
3483 res = treebuilder_handle_comment(target, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003484 Py_XDECREF(res);
3485 Py_DECREF(comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02003486 } else if (self->handle_comment) {
3487 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3488 if (!comment)
3489 return;
3490
Petr Viktorinffd97532020-02-11 17:46:57 +01003491 res = PyObject_CallOneArg(self->handle_comment, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003492 Py_XDECREF(res);
3493 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494 }
3495}
3496
Eli Bendersky45839902013-01-13 05:14:47 -08003497static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003498expat_start_doctype_handler(XMLParserObject *self,
3499 const XML_Char *doctype_name,
3500 const XML_Char *sysid,
3501 const XML_Char *pubid,
3502 int has_internal_subset)
3503{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003504 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003505 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003506 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003507
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003508 if (PyErr_Occurred())
3509 return;
3510
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003511 doctype_name_obj = makeuniversal(self, doctype_name);
3512 if (!doctype_name_obj)
3513 return;
3514
3515 if (sysid) {
3516 sysid_obj = makeuniversal(self, sysid);
3517 if (!sysid_obj) {
3518 Py_DECREF(doctype_name_obj);
3519 return;
3520 }
3521 } else {
3522 Py_INCREF(Py_None);
3523 sysid_obj = Py_None;
3524 }
3525
3526 if (pubid) {
3527 pubid_obj = makeuniversal(self, pubid);
3528 if (!pubid_obj) {
3529 Py_DECREF(doctype_name_obj);
3530 Py_DECREF(sysid_obj);
3531 return;
3532 }
3533 } else {
3534 Py_INCREF(Py_None);
3535 pubid_obj = Py_None;
3536 }
3537
3538 /* If the target has a handler for doctype, call it. */
3539 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003540 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3541 doctype_name_obj, pubid_obj,
3542 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003543 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003544 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003545 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3546 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3547 "The doctype() method of XMLParser is ignored. "
3548 "Define doctype() method on the TreeBuilder target.",
3549 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003550 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003551 }
3552
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003553 Py_DECREF(doctype_name_obj);
3554 Py_DECREF(pubid_obj);
3555 Py_DECREF(sysid_obj);
3556}
3557
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558static void
3559expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3560 const XML_Char* data_in)
3561{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003562 PyObject* pi_target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003563 PyObject* data;
3564 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003565 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003566
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003567 if (PyErr_Occurred())
3568 return;
3569
Stefan Behnel43851a22019-05-01 21:20:38 +02003570 if (TreeBuilder_CheckExact(self->target)) {
3571 /* shortcut */
3572 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3573
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003574 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003575 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3576 if (!pi_target)
3577 goto error;
3578 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3579 if (!data)
3580 goto error;
3581 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 Py_XDECREF(res);
3583 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003584 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003586 } else if (self->handle_pi) {
3587 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3588 if (!pi_target)
3589 goto error;
3590 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3591 if (!data)
3592 goto error;
3593
3594 stack[0] = pi_target;
3595 stack[1] = data;
3596 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3597 Py_XDECREF(res);
3598 Py_DECREF(data);
3599 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003601
3602 return;
3603
3604 error:
3605 Py_XDECREF(pi_target);
3606 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607}
3608
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610
Eli Bendersky52467b12012-06-01 07:13:08 +03003611static PyObject *
3612xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613{
Eli Bendersky52467b12012-06-01 07:13:08 +03003614 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3615 if (self) {
3616 self->parser = NULL;
3617 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003618 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003619 self->handle_start = self->handle_data = self->handle_end = NULL;
3620 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003621 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003623 return (PyObject *)self;
3624}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625
scoderc8d8e152017-09-14 22:00:03 +02003626static int
3627ignore_attribute_error(PyObject *value)
3628{
3629 if (value == NULL) {
3630 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3631 return -1;
3632 }
3633 PyErr_Clear();
3634 }
3635 return 0;
3636}
3637
Serhiy Storchakacb985562015-05-04 15:32:48 +03003638/*[clinic input]
3639_elementtree.XMLParser.__init__
3640
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003641 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003642 target: object = NULL
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003643 encoding: str(accept={str, NoneType}) = None
Serhiy Storchakacb985562015-05-04 15:32:48 +03003644
3645[clinic start generated code]*/
3646
Eli Bendersky52467b12012-06-01 07:13:08 +03003647static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003648_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3649 const char *encoding)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003650/*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003651{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003652 self->entity = PyDict_New();
3653 if (!self->entity)
3654 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655
Serhiy Storchakacb985562015-05-04 15:32:48 +03003656 self->names = PyDict_New();
3657 if (!self->names) {
3658 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003659 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003661
Serhiy Storchakacb985562015-05-04 15:32:48 +03003662 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3663 if (!self->parser) {
3664 Py_CLEAR(self->entity);
3665 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003667 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003669 /* expat < 2.1.0 has no XML_SetHashSalt() */
3670 if (EXPAT(SetHashSalt) != NULL) {
3671 EXPAT(SetHashSalt)(self->parser,
3672 (unsigned long)_Py_HashSecret.expat.hashsalt);
3673 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003674
Eli Bendersky52467b12012-06-01 07:13:08 +03003675 if (target) {
3676 Py_INCREF(target);
3677 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003678 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003680 Py_CLEAR(self->entity);
3681 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003682 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003684 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003685 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003687 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3688 if (ignore_attribute_error(self->handle_start_ns)) {
3689 return -1;
3690 }
3691 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3692 if (ignore_attribute_error(self->handle_end_ns)) {
3693 return -1;
3694 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003695 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003696 if (ignore_attribute_error(self->handle_start)) {
3697 return -1;
3698 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003699 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003700 if (ignore_attribute_error(self->handle_data)) {
3701 return -1;
3702 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003703 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003704 if (ignore_attribute_error(self->handle_end)) {
3705 return -1;
3706 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003707 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003708 if (ignore_attribute_error(self->handle_comment)) {
3709 return -1;
3710 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003711 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003712 if (ignore_attribute_error(self->handle_pi)) {
3713 return -1;
3714 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003715 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003716 if (ignore_attribute_error(self->handle_close)) {
3717 return -1;
3718 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003719 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003720 if (ignore_attribute_error(self->handle_doctype)) {
3721 return -1;
3722 }
Eli Bendersky45839902013-01-13 05:14:47 -08003723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003725 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003726 if (self->handle_start_ns || self->handle_end_ns)
3727 EXPAT(SetNamespaceDeclHandler)(
3728 self->parser,
3729 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3730 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3731 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003732 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003733 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734 (XML_StartElementHandler) expat_start_handler,
3735 (XML_EndElementHandler) expat_end_handler
3736 );
3737 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003738 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003739 (XML_DefaultHandler) expat_default_handler
3740 );
3741 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003742 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003743 (XML_CharacterDataHandler) expat_data_handler
3744 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003745 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003746 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003747 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003748 (XML_CommentHandler) expat_comment_handler
3749 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003750 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003751 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003752 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003753 (XML_ProcessingInstructionHandler) expat_pi_handler
3754 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003755 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003756 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003757 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3758 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003759 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003760 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003761 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003763
Eli Bendersky52467b12012-06-01 07:13:08 +03003764 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003765}
3766
Eli Bendersky52467b12012-06-01 07:13:08 +03003767static int
3768xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3769{
3770 Py_VISIT(self->handle_close);
3771 Py_VISIT(self->handle_pi);
3772 Py_VISIT(self->handle_comment);
3773 Py_VISIT(self->handle_end);
3774 Py_VISIT(self->handle_data);
3775 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003776 Py_VISIT(self->handle_start_ns);
3777 Py_VISIT(self->handle_end_ns);
3778 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003779
3780 Py_VISIT(self->target);
3781 Py_VISIT(self->entity);
3782 Py_VISIT(self->names);
3783
3784 return 0;
3785}
3786
3787static int
3788xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003789{
Victor Stinnere727d412017-09-18 05:29:37 -07003790 if (self->parser != NULL) {
3791 XML_Parser parser = self->parser;
3792 self->parser = NULL;
3793 EXPAT(ParserFree)(parser);
3794 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003795
Antoine Pitrouc1948842012-10-01 23:40:37 +02003796 Py_CLEAR(self->handle_close);
3797 Py_CLEAR(self->handle_pi);
3798 Py_CLEAR(self->handle_comment);
3799 Py_CLEAR(self->handle_end);
3800 Py_CLEAR(self->handle_data);
3801 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003802 Py_CLEAR(self->handle_start_ns);
3803 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003804 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003805
Antoine Pitrouc1948842012-10-01 23:40:37 +02003806 Py_CLEAR(self->target);
3807 Py_CLEAR(self->entity);
3808 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003809
Eli Bendersky52467b12012-06-01 07:13:08 +03003810 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003811}
3812
Eli Bendersky52467b12012-06-01 07:13:08 +03003813static void
3814xmlparser_dealloc(XMLParserObject* self)
3815{
3816 PyObject_GC_UnTrack(self);
3817 xmlparser_gc_clear(self);
3818 Py_TYPE(self)->tp_free((PyObject *)self);
3819}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003820
Oren Milman402e1cd2020-04-12 17:36:41 +03003821Py_LOCAL_INLINE(int)
3822_check_xmlparser(XMLParserObject* self)
3823{
3824 if (self->target == NULL) {
3825 PyErr_SetString(PyExc_ValueError,
3826 "XMLParser.__init__() wasn't called");
3827 return 0;
3828 }
3829 return 1;
3830}
3831
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003832LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003833expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003834{
3835 int ok;
3836
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003837 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003838 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3839
3840 if (PyErr_Occurred())
3841 return NULL;
3842
3843 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003844 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003845 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003846 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003847 EXPAT(GetErrorColumnNumber)(self->parser),
3848 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003849 );
3850 return NULL;
3851 }
3852
3853 Py_RETURN_NONE;
3854}
3855
Serhiy Storchakacb985562015-05-04 15:32:48 +03003856/*[clinic input]
3857_elementtree.XMLParser.close
3858
3859[clinic start generated code]*/
3860
3861static PyObject *
3862_elementtree_XMLParser_close_impl(XMLParserObject *self)
3863/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003864{
3865 /* end feeding data to parser */
3866
3867 PyObject* res;
Oren Milman402e1cd2020-04-12 17:36:41 +03003868
3869 if (!_check_xmlparser(self)) {
3870 return NULL;
3871 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003872 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003873 if (!res)
3874 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003875
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003876 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003877 Py_DECREF(res);
3878 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003879 }
3880 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003881 Py_DECREF(res);
Victor Stinner2ff58a22019-06-17 14:27:23 +02003882 return PyObject_CallNoArgs(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003883 }
3884 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003885 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003886 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003887}
3888
Serhiy Storchakacb985562015-05-04 15:32:48 +03003889/*[clinic input]
3890_elementtree.XMLParser.feed
3891
3892 data: object
3893 /
3894
3895[clinic start generated code]*/
3896
3897static PyObject *
3898_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3899/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003900{
3901 /* feed data to parser */
3902
Oren Milman402e1cd2020-04-12 17:36:41 +03003903 if (!_check_xmlparser(self)) {
3904 return NULL;
3905 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003906 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003907 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003908 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3909 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003910 return NULL;
3911 if (data_len > INT_MAX) {
3912 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3913 return NULL;
3914 }
3915 /* Explicitly set UTF-8 encoding. Return code ignored. */
3916 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003917 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003918 }
3919 else {
3920 Py_buffer view;
3921 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003922 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003923 return NULL;
3924 if (view.len > INT_MAX) {
3925 PyBuffer_Release(&view);
3926 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3927 return NULL;
3928 }
3929 res = expat_parse(self, view.buf, (int)view.len, 0);
3930 PyBuffer_Release(&view);
3931 return res;
3932 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003933}
3934
Serhiy Storchakacb985562015-05-04 15:32:48 +03003935/*[clinic input]
3936_elementtree.XMLParser._parse_whole
3937
3938 file: object
3939 /
3940
3941[clinic start generated code]*/
3942
3943static PyObject *
3944_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3945/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003946{
Eli Benderskya3699232013-05-19 18:47:23 -07003947 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003948 PyObject* reader;
3949 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003950 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003951 PyObject* res;
3952
Oren Milman402e1cd2020-04-12 17:36:41 +03003953 if (!_check_xmlparser(self)) {
3954 return NULL;
3955 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003956 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003957 if (!reader)
3958 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003959
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003960 /* read from open file object */
3961 for (;;) {
3962
3963 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3964
3965 if (!buffer) {
3966 /* read failed (e.g. due to KeyboardInterrupt) */
3967 Py_DECREF(reader);
3968 return NULL;
3969 }
3970
Eli Benderskyf996e772012-03-16 05:53:30 +02003971 if (PyUnicode_CheckExact(buffer)) {
3972 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003973 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003974 Py_DECREF(buffer);
3975 break;
3976 }
3977 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003978 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003979 if (!temp) {
3980 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003981 Py_DECREF(reader);
3982 return NULL;
3983 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003984 buffer = temp;
3985 }
3986 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003987 Py_DECREF(buffer);
3988 break;
3989 }
3990
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003991 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3992 Py_DECREF(buffer);
3993 Py_DECREF(reader);
3994 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3995 return NULL;
3996 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003997 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003998 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003999 );
4000
4001 Py_DECREF(buffer);
4002
4003 if (!res) {
4004 Py_DECREF(reader);
4005 return NULL;
4006 }
4007 Py_DECREF(res);
4008
4009 }
4010
4011 Py_DECREF(reader);
4012
4013 res = expat_parse(self, "", 0, 1);
4014
4015 if (res && TreeBuilder_CheckExact(self->target)) {
4016 Py_DECREF(res);
4017 return treebuilder_done((TreeBuilderObject*) self->target);
4018 }
4019
4020 return res;
4021}
4022
Serhiy Storchakacb985562015-05-04 15:32:48 +03004023/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004024_elementtree.XMLParser._setevents
4025
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004026 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004027 events_to_report: object = None
4028 /
4029
4030[clinic start generated code]*/
4031
4032static PyObject *
4033_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4034 PyObject *events_queue,
4035 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004036/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004037{
4038 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004039 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004040 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004041 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004042
Oren Milman402e1cd2020-04-12 17:36:41 +03004043 if (!_check_xmlparser(self)) {
4044 return NULL;
4045 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004046 if (!TreeBuilder_CheckExact(self->target)) {
4047 PyErr_SetString(
4048 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004049 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004050 "targets"
4051 );
4052 return NULL;
4053 }
4054
4055 target = (TreeBuilderObject*) self->target;
4056
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004057 events_append = PyObject_GetAttrString(events_queue, "append");
4058 if (events_append == NULL)
4059 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004060 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004061
4062 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004063 Py_CLEAR(target->start_event_obj);
4064 Py_CLEAR(target->end_event_obj);
4065 Py_CLEAR(target->start_ns_event_obj);
4066 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004067 Py_CLEAR(target->comment_event_obj);
4068 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004069
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004070 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004071 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004072 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004073 Py_RETURN_NONE;
4074 }
4075
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004076 if (!(events_seq = PySequence_Fast(events_to_report,
4077 "events must be a sequence"))) {
4078 return NULL;
4079 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004080
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004081 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004082 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004083 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004084 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004085 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004086 } else if (PyBytes_Check(event_name_obj)) {
4087 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004088 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004089 if (event_name == NULL) {
4090 Py_DECREF(events_seq);
4091 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4092 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004093 }
4094
4095 Py_INCREF(event_name_obj);
4096 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004097 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004098 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004099 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004100 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004101 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004102 EXPAT(SetNamespaceDeclHandler)(
4103 self->parser,
4104 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4105 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4106 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004107 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004108 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004109 EXPAT(SetNamespaceDeclHandler)(
4110 self->parser,
4111 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4112 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4113 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004114 } else if (strcmp(event_name, "comment") == 0) {
4115 Py_XSETREF(target->comment_event_obj, event_name_obj);
4116 EXPAT(SetCommentHandler)(
4117 self->parser,
4118 (XML_CommentHandler) expat_comment_handler
4119 );
4120 } else if (strcmp(event_name, "pi") == 0) {
4121 Py_XSETREF(target->pi_event_obj, event_name_obj);
4122 EXPAT(SetProcessingInstructionHandler)(
4123 self->parser,
4124 (XML_ProcessingInstructionHandler) expat_pi_handler
4125 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004126 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004127 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004128 Py_DECREF(events_seq);
4129 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004130 return NULL;
4131 }
4132 }
4133
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004134 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004135 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004136}
4137
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004138static PyMemberDef xmlparser_members[] = {
4139 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4140 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4141 {NULL}
4142};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004143
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004144static PyObject*
4145xmlparser_version_getter(XMLParserObject *self, void *closure)
4146{
4147 return PyUnicode_FromFormat(
4148 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4149 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004150}
4151
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004152static PyGetSetDef xmlparser_getsetlist[] = {
4153 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4154 {NULL},
4155};
4156
Serhiy Storchakacb985562015-05-04 15:32:48 +03004157#include "clinic/_elementtree.c.h"
4158
4159static PyMethodDef element_methods[] = {
4160
4161 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4162
4163 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4164 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4165
4166 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4167 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4168 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4169
4170 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4171 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4172 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4173 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4174
4175 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4176 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4177 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4178
Serhiy Storchakacb985562015-05-04 15:32:48 +03004179 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4180 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4181
4182 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4183
4184 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4185 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4186 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4187 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4188 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4189
4190 {NULL, NULL}
4191};
4192
4193static PyMappingMethods element_as_mapping = {
4194 (lenfunc) element_length,
4195 (binaryfunc) element_subscr,
4196 (objobjargproc) element_ass_subscr,
4197};
4198
Serhiy Storchakadde08152015-11-25 15:28:13 +02004199static PyGetSetDef element_getsetlist[] = {
4200 {"tag",
4201 (getter)element_tag_getter,
4202 (setter)element_tag_setter,
4203 "A string identifying what kind of data this element represents"},
4204 {"text",
4205 (getter)element_text_getter,
4206 (setter)element_text_setter,
4207 "A string of text directly after the start tag, or None"},
4208 {"tail",
4209 (getter)element_tail_getter,
4210 (setter)element_tail_setter,
4211 "A string of text directly after the end tag, or None"},
4212 {"attrib",
4213 (getter)element_attrib_getter,
4214 (setter)element_attrib_setter,
4215 "A dictionary containing the element's attributes"},
4216 {NULL},
4217};
4218
Serhiy Storchakacb985562015-05-04 15:32:48 +03004219static PyTypeObject Element_Type = {
4220 PyVarObject_HEAD_INIT(NULL, 0)
4221 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4222 /* methods */
4223 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004224 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004225 0, /* tp_getattr */
4226 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004227 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004228 (reprfunc)element_repr, /* tp_repr */
4229 0, /* tp_as_number */
4230 &element_as_sequence, /* tp_as_sequence */
4231 &element_as_mapping, /* tp_as_mapping */
4232 0, /* tp_hash */
4233 0, /* tp_call */
4234 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004235 PyObject_GenericGetAttr, /* tp_getattro */
4236 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004237 0, /* tp_as_buffer */
4238 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4239 /* tp_flags */
4240 0, /* tp_doc */
4241 (traverseproc)element_gc_traverse, /* tp_traverse */
4242 (inquiry)element_gc_clear, /* tp_clear */
4243 0, /* tp_richcompare */
4244 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4245 0, /* tp_iter */
4246 0, /* tp_iternext */
4247 element_methods, /* tp_methods */
4248 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004249 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004250 0, /* tp_base */
4251 0, /* tp_dict */
4252 0, /* tp_descr_get */
4253 0, /* tp_descr_set */
4254 0, /* tp_dictoffset */
4255 (initproc)element_init, /* tp_init */
4256 PyType_GenericAlloc, /* tp_alloc */
4257 element_new, /* tp_new */
4258 0, /* tp_free */
4259};
4260
4261static PyMethodDef treebuilder_methods[] = {
4262 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4263 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4264 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004265 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4266 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004267 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4268 {NULL, NULL}
4269};
4270
4271static PyTypeObject TreeBuilder_Type = {
4272 PyVarObject_HEAD_INIT(NULL, 0)
4273 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4274 /* methods */
4275 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004276 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004277 0, /* tp_getattr */
4278 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004279 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004280 0, /* tp_repr */
4281 0, /* tp_as_number */
4282 0, /* tp_as_sequence */
4283 0, /* tp_as_mapping */
4284 0, /* tp_hash */
4285 0, /* tp_call */
4286 0, /* tp_str */
4287 0, /* tp_getattro */
4288 0, /* tp_setattro */
4289 0, /* tp_as_buffer */
4290 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4291 /* tp_flags */
4292 0, /* tp_doc */
4293 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4294 (inquiry)treebuilder_gc_clear, /* tp_clear */
4295 0, /* tp_richcompare */
4296 0, /* tp_weaklistoffset */
4297 0, /* tp_iter */
4298 0, /* tp_iternext */
4299 treebuilder_methods, /* tp_methods */
4300 0, /* tp_members */
4301 0, /* tp_getset */
4302 0, /* tp_base */
4303 0, /* tp_dict */
4304 0, /* tp_descr_get */
4305 0, /* tp_descr_set */
4306 0, /* tp_dictoffset */
4307 _elementtree_TreeBuilder___init__, /* tp_init */
4308 PyType_GenericAlloc, /* tp_alloc */
4309 treebuilder_new, /* tp_new */
4310 0, /* tp_free */
4311};
4312
4313static PyMethodDef xmlparser_methods[] = {
4314 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4315 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4316 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4317 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004318 {NULL, NULL}
4319};
4320
Neal Norwitz227b5332006-03-22 09:28:35 +00004321static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004322 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004323 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004324 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004325 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004326 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004327 0, /* tp_getattr */
4328 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004329 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004330 0, /* tp_repr */
4331 0, /* tp_as_number */
4332 0, /* tp_as_sequence */
4333 0, /* tp_as_mapping */
4334 0, /* tp_hash */
4335 0, /* tp_call */
4336 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004337 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004338 0, /* tp_setattro */
4339 0, /* tp_as_buffer */
4340 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4341 /* tp_flags */
4342 0, /* tp_doc */
4343 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4344 (inquiry)xmlparser_gc_clear, /* tp_clear */
4345 0, /* tp_richcompare */
4346 0, /* tp_weaklistoffset */
4347 0, /* tp_iter */
4348 0, /* tp_iternext */
4349 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004350 xmlparser_members, /* tp_members */
4351 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004352 0, /* tp_base */
4353 0, /* tp_dict */
4354 0, /* tp_descr_get */
4355 0, /* tp_descr_set */
4356 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004357 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004358 PyType_GenericAlloc, /* tp_alloc */
4359 xmlparser_new, /* tp_new */
4360 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004361};
4362
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004363/* ==================================================================== */
4364/* python module interface */
4365
4366static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004367 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004368 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004369 {NULL, NULL}
4370};
4371
Martin v. Löwis1a214512008-06-11 05:26:20 +00004372
Eli Bendersky532d03e2013-08-10 08:00:39 -07004373static struct PyModuleDef elementtreemodule = {
4374 PyModuleDef_HEAD_INIT,
4375 "_elementtree",
4376 NULL,
4377 sizeof(elementtreestate),
4378 _functions,
4379 NULL,
4380 elementtree_traverse,
4381 elementtree_clear,
4382 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004383};
4384
Neal Norwitzf6657e62006-12-28 04:47:50 +00004385PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004386PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004387{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004388 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004389 elementtreestate *st;
4390
4391 m = PyState_FindModule(&elementtreemodule);
4392 if (m) {
4393 Py_INCREF(m);
4394 return m;
4395 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004396
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004397 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004398 if (PyType_Ready(&ElementIter_Type) < 0)
4399 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004400 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004401 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004402 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004403 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004404 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004405 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004406
Eli Bendersky532d03e2013-08-10 08:00:39 -07004407 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004408 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004409 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08004410 st = get_elementtree_state(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004411
Eli Bendersky828efde2012-04-05 05:40:58 +03004412 if (!(temp = PyImport_ImportModule("copy")))
4413 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004414 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004415 Py_XDECREF(temp);
4416
Victor Stinnerb136f112017-07-10 22:28:02 +02004417 if (st->deepcopy_obj == NULL) {
4418 return NULL;
4419 }
4420
4421 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004422 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004423 return NULL;
4424
Eli Bendersky20d41742012-06-01 09:48:37 +03004425 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004426 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4427 if (expat_capi) {
4428 /* check that it's usable */
4429 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004430 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004431 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4432 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004433 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004434 PyErr_SetString(PyExc_ImportError,
4435 "pyexpat version is incompatible");
4436 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004437 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004438 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004439 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004441
Eli Bendersky532d03e2013-08-10 08:00:39 -07004442 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004443 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004444 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004445 Py_INCREF(st->parseerror_obj);
Dong-hee Na016bdd52020-03-29 23:12:11 +09004446 if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4447 Py_DECREF(st->parseerror_obj);
4448 return NULL;
4449 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004450
Dong-hee Na016bdd52020-03-29 23:12:11 +09004451 PyTypeObject *types[] = {
4452 &Element_Type,
4453 &TreeBuilder_Type,
4454 &XMLParser_Type
4455 };
Eli Bendersky092af1f2012-03-04 07:14:03 +02004456
Dong-hee Na016bdd52020-03-29 23:12:11 +09004457 for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4458 if (PyModule_AddType(m, types[i]) < 0) {
4459 return NULL;
4460 }
4461 }
Eli Bendersky52467b12012-06-01 07:13:08 +03004462
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004463 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004464}