blob: 9dadeef712938430a6c4de6c57e44a2926c609f3 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
Miss Islington (bot)f7f1c262021-07-30 07:25:28 -07003 * See https://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020017#include "structmember.h" // PyMemberDef
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +020095 PyObject *comment_factory;
96 PyObject *pi_factory;
Eli Bendersky532d03e2013-08-10 08:00:39 -070097} elementtreestate;
98
99static struct PyModuleDef elementtreemodule;
100
101/* Given a module object (assumed to be _elementtree), get its per-module
102 * state.
103 */
Hai Shif707d942020-03-16 21:15:01 +0800104static inline elementtreestate*
105get_elementtree_state(PyObject *module)
106{
107 void *state = PyModule_GetState(module);
108 assert(state != NULL);
109 return (elementtreestate *)state;
110}
Eli Bendersky532d03e2013-08-10 08:00:39 -0700111
112/* Find the module instance imported in the currently running sub-interpreter
113 * and get its state.
114 */
115#define ET_STATE_GLOBAL \
116 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
117
118static int
119elementtree_clear(PyObject *m)
120{
Hai Shif707d942020-03-16 21:15:01 +0800121 elementtreestate *st = get_elementtree_state(m);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700122 Py_CLEAR(st->parseerror_obj);
123 Py_CLEAR(st->deepcopy_obj);
124 Py_CLEAR(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200125 Py_CLEAR(st->comment_factory);
126 Py_CLEAR(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700127 return 0;
128}
129
130static int
131elementtree_traverse(PyObject *m, visitproc visit, void *arg)
132{
Hai Shif707d942020-03-16 21:15:01 +0800133 elementtreestate *st = get_elementtree_state(m);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700134 Py_VISIT(st->parseerror_obj);
135 Py_VISIT(st->deepcopy_obj);
136 Py_VISIT(st->elementpath_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +0200137 Py_VISIT(st->comment_factory);
138 Py_VISIT(st->pi_factory);
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 return 0;
140}
141
142static void
143elementtree_free(void *m)
144{
145 elementtree_clear((PyObject *)m);
146}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000147
148/* helpers */
149
150LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151list_join(PyObject* list)
152{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300153 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000154 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155 PyObject* result;
156
Antoine Pitrouc1948842012-10-01 23:40:37 +0200157 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 if (!joiner)
159 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162 return result;
163}
164
Eli Bendersky48d358b2012-05-30 17:57:50 +0300165/* Is the given object an empty dictionary?
166*/
167static int
168is_empty_dict(PyObject *obj)
169{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200170 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300171}
172
173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200175/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000176
177typedef struct {
178
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200179 /* attributes (a dictionary object), or NULL if no attributes */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000180 PyObject* attrib;
181
182 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200183 Py_ssize_t length; /* actual number of items */
184 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000185
186 /* this either points to _children or to a malloced buffer */
187 PyObject* *children;
188
189 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100190
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000191} ElementObjectExtra;
192
193typedef struct {
194 PyObject_HEAD
195
196 /* element tag (a string). */
197 PyObject* tag;
198
199 /* text before first child. note that this is a tagged pointer;
200 use JOIN_OBJ to get the object pointer. the join flag is used
201 to distinguish lists created by the tree builder from lists
202 assigned to the attribute by application code; the former
203 should be joined before being returned to the user, the latter
204 should be left intact. */
205 PyObject* text;
206
207 /* text after this element, in parent. note that this is a tagged
208 pointer; use JOIN_OBJ to get the object pointer. */
209 PyObject* tail;
210
211 ElementObjectExtra* extra;
212
Eli Benderskyebf37a22012-04-03 22:02:37 +0300213 PyObject *weakreflist; /* For tp_weaklistoffset */
214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215} ElementObject;
216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000217
Dong-hee Na1b55b652020-02-17 19:09:15 +0900218#define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
Serhiy Storchakab11c5662018-10-14 10:32:19 +0300219#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
220
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200223/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227{
228 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200229 if (!self->extra) {
230 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000231 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200232 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000233
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200234 Py_XINCREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000235 self->extra->attrib = attrib;
236
237 self->extra->length = 0;
238 self->extra->allocated = STATIC_CHILDREN;
239 self->extra->children = self->extra->_children;
240
241 return 0;
242}
243
244LOCAL(void)
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300245dealloc_extra(ElementObjectExtra *extra)
246{
247 Py_ssize_t i;
248
249 if (!extra)
250 return;
251
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200252 Py_XDECREF(extra->attrib);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300253
254 for (i = 0; i < extra->length; i++)
255 Py_DECREF(extra->children[i]);
256
257 if (extra->children != extra->_children)
258 PyObject_Free(extra->children);
259
260 PyObject_Free(extra);
261}
262
263LOCAL(void)
264clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000265{
Eli Bendersky08b85292012-04-04 15:55:07 +0300266 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 if (!self->extra)
269 return;
270
271 /* Avoid DECREFs calling into this code again (cycles, etc.)
272 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300273 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 self->extra = NULL;
275
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300276 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000277}
278
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279/* Convenience internal function to create new Element objects with the given
280 * tag and attributes.
281*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200283create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284{
285 ElementObject* self;
286
Eli Bendersky0192ba32012-03-30 16:38:33 +0300287 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 if (self == NULL)
289 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 self->extra = NULL;
291
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000292 Py_INCREF(tag);
293 self->tag = tag;
294
295 Py_INCREF(Py_None);
296 self->text = Py_None;
297
298 Py_INCREF(Py_None);
299 self->tail = Py_None;
300
Eli Benderskyebf37a22012-04-03 22:02:37 +0300301 self->weakreflist = NULL;
302
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200303 ALLOC(sizeof(ElementObject), "create element");
304 PyObject_GC_Track(self);
305
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200306 if (attrib != NULL && !is_empty_dict(attrib)) {
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200308 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200309 return NULL;
310 }
311 }
312
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000313 return (PyObject*) self;
314}
315
Eli Bendersky092af1f2012-03-04 07:14:03 +0200316static PyObject *
317element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
318{
319 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
320 if (e != NULL) {
321 Py_INCREF(Py_None);
322 e->tag = Py_None;
323
324 Py_INCREF(Py_None);
325 e->text = Py_None;
326
327 Py_INCREF(Py_None);
328 e->tail = Py_None;
329
330 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300331 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200332 }
333 return (PyObject *)e;
334}
335
Eli Bendersky737b1732012-05-29 06:02:56 +0300336/* Helper function for extracting the attrib dictionary from a keywords dict.
337 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800338 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700340 *
341 * Return a dictionary with the content of kwds merged into the content of
342 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300343 */
344static PyObject*
345get_attrib_from_keywords(PyObject *kwds)
346{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700347 PyObject *attrib_str = PyUnicode_FromString("attrib");
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600348 if (attrib_str == NULL) {
349 return NULL;
350 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200351 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352
353 if (attrib) {
354 /* If attrib was found in kwds, copy its value and remove it from
355 * kwds
356 */
357 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700358 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
360 Py_TYPE(attrib)->tp_name);
361 return NULL;
362 }
363 attrib = PyDict_Copy(attrib);
Serhiy Storchaka8905fcc2018-12-11 08:38:03 +0200364 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
365 Py_DECREF(attrib);
366 attrib = NULL;
367 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200368 }
369 else if (!PyErr_Occurred()) {
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 attrib = PyDict_New();
371 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700372
373 Py_DECREF(attrib_str);
374
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600375 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
376 Py_DECREF(attrib);
377 return NULL;
378 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300379 return attrib;
380}
381
Serhiy Storchakacb985562015-05-04 15:32:48 +0300382/*[clinic input]
383module _elementtree
384class _elementtree.Element "ElementObject *" "&Element_Type"
385class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
386class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
387[clinic start generated code]*/
388/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
389
Eli Bendersky092af1f2012-03-04 07:14:03 +0200390static int
391element_init(PyObject *self, PyObject *args, PyObject *kwds)
392{
393 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200394 PyObject *attrib = NULL;
395 ElementObject *self_elem;
396
397 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
398 return -1;
399
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 if (attrib) {
401 /* attrib passed as positional arg */
402 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (!attrib)
404 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300405 if (kwds) {
406 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300408 return -1;
409 }
410 }
411 } else if (kwds) {
412 /* have keywords args */
413 attrib = get_attrib_from_keywords(kwds);
414 if (!attrib)
415 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 }
417
418 self_elem = (ElementObject *)self;
419
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200422 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423 return -1;
424 }
425 }
426
Eli Bendersky48d358b2012-05-30 17:57:50 +0300427 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200428 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429
430 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300432 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200433
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300435 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436
Eli Bendersky092af1f2012-03-04 07:14:03 +0200437 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300438 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200439
440 return 0;
441}
442
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200446 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 PyObject* *children;
448
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300449 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000450 /* make sure self->children can hold the given number of extra
451 elements. set an exception and return -1 if allocation failed */
452
Victor Stinner5f0af232013-07-11 23:01:36 +0200453 if (!self->extra) {
454 if (create_extra(self, NULL) < 0)
455 return -1;
456 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000457
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200458 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000459
460 if (size > self->extra->allocated) {
461 /* use Python 2.4's list growth strategy */
462 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100464 * which needs at least 4 bytes.
465 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 * be safe.
467 */
468 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200469 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
470 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000471 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000472 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100473 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000474 * false alarm always assume at least one child to be safe.
475 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000476 children = PyObject_Realloc(self->extra->children,
477 size * sizeof(PyObject*));
478 if (!children)
479 goto nomemory;
480 } else {
481 children = PyObject_Malloc(size * sizeof(PyObject*));
482 if (!children)
483 goto nomemory;
484 /* copy existing children from static area to malloc buffer */
485 memcpy(children, self->extra->children,
486 self->extra->length * sizeof(PyObject*));
487 }
488 self->extra->children = children;
489 self->extra->allocated = size;
490 }
491
492 return 0;
493
494 nomemory:
495 PyErr_NoMemory();
496 return -1;
497}
498
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300499LOCAL(void)
500raise_type_error(PyObject *element)
501{
502 PyErr_Format(PyExc_TypeError,
503 "expected an Element, not \"%.200s\"",
504 Py_TYPE(element)->tp_name);
505}
506
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000507LOCAL(int)
508element_add_subelement(ElementObject* self, PyObject* element)
509{
510 /* add a child element to a parent */
511
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300512 if (!Element_Check(element)) {
513 raise_type_error(element);
514 return -1;
515 }
516
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000517 if (element_resize(self, 1) < 0)
518 return -1;
519
520 Py_INCREF(element);
521 self->extra->children[self->extra->length] = element;
522
523 self->extra->length++;
524
525 return 0;
526}
527
528LOCAL(PyObject*)
529element_get_attrib(ElementObject* self)
530{
531 /* return borrowed reference to attrib dictionary */
532 /* note: this function assumes that the extra section exists */
533
534 PyObject* res = self->extra->attrib;
535
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200536 if (!res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537 /* create missing dictionary */
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200538 res = self->extra->attrib = PyDict_New();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000539 }
540
541 return res;
542}
543
544LOCAL(PyObject*)
545element_get_text(ElementObject* self)
546{
547 /* return borrowed reference to text attribute */
548
Serhiy Storchaka576def02017-03-30 09:47:31 +0300549 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550
551 if (JOIN_GET(res)) {
552 res = JOIN_OBJ(res);
553 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300554 PyObject *tmp = list_join(res);
555 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000556 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300557 self->text = tmp;
558 Py_DECREF(res);
559 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000560 }
561 }
562
563 return res;
564}
565
566LOCAL(PyObject*)
567element_get_tail(ElementObject* self)
568{
569 /* return borrowed reference to text attribute */
570
Serhiy Storchaka576def02017-03-30 09:47:31 +0300571 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000572
573 if (JOIN_GET(res)) {
574 res = JOIN_OBJ(res);
575 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300576 PyObject *tmp = list_join(res);
577 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300579 self->tail = tmp;
580 Py_DECREF(res);
581 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 }
583 }
584
585 return res;
586}
587
588static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300589subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590{
591 PyObject* elem;
592
593 ElementObject* parent;
594 PyObject* tag;
595 PyObject* attrib = NULL;
596 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
597 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800598 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800600 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 if (attrib) {
603 /* attrib passed as positional arg */
604 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000605 if (!attrib)
606 return NULL;
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -0600607 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
608 Py_DECREF(attrib);
609 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300610 }
611 } else if (kwds) {
612 /* have keyword args */
613 attrib = get_attrib_from_keywords(kwds);
614 if (!attrib)
615 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300617 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000618 }
619
Eli Bendersky092af1f2012-03-04 07:14:03 +0200620 elem = create_new_element(tag, attrib);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200621 Py_XDECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200622 if (elem == NULL)
623 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000624
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000625 if (element_add_subelement(parent, elem) < 0) {
626 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000628 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000629
630 return elem;
631}
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633static int
634element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
635{
636 Py_VISIT(self->tag);
637 Py_VISIT(JOIN_OBJ(self->text));
638 Py_VISIT(JOIN_OBJ(self->tail));
639
640 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200641 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 Py_VISIT(self->extra->attrib);
643
644 for (i = 0; i < self->extra->length; ++i)
645 Py_VISIT(self->extra->children[i]);
646 }
647 return 0;
648}
649
650static int
651element_gc_clear(ElementObject *self)
652{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300653 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700654 _clear_joined_ptr(&self->text);
655 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300656
657 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300658 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300660 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300661 return 0;
662}
663
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664static void
665element_dealloc(ElementObject* self)
666{
INADA Naokia6296d32017-08-24 14:55:17 +0900667 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300668 PyObject_GC_UnTrack(self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200669 Py_TRASHCAN_BEGIN(self, element_dealloc)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670
671 if (self->weakreflist != NULL)
672 PyObject_ClearWeakRefs((PyObject *) self);
673
Eli Bendersky0192ba32012-03-30 16:38:33 +0300674 /* element_gc_clear clears all references and deallocates extra
675 */
676 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000677
678 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200679 Py_TYPE(self)->tp_free((PyObject *)self);
Jeroen Demeyer351c6742019-05-10 19:21:11 +0200680 Py_TRASHCAN_END
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681}
682
683/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688 subelement: object(subclass_of='&Element_Type')
689 /
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
695/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
696{
697 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698 return NULL;
699
700 Py_RETURN_NONE;
701}
702
Serhiy Storchakacb985562015-05-04 15:32:48 +0300703/*[clinic input]
704_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706[clinic start generated code]*/
707
708static PyObject *
709_elementtree_Element_clear_impl(ElementObject *self)
710/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
711{
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300712 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300715 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716
717 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300718 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719
720 Py_RETURN_NONE;
721}
722
Serhiy Storchakacb985562015-05-04 15:32:48 +0300723/*[clinic input]
724_elementtree.Element.__copy__
725
726[clinic start generated code]*/
727
728static PyObject *
729_elementtree_Element___copy___impl(ElementObject *self)
730/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000731{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200732 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733 ElementObject* element;
734
Eli Bendersky092af1f2012-03-04 07:14:03 +0200735 element = (ElementObject*) create_new_element(
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200736 self->tag, self->extra ? self->extra->attrib : NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 if (!element)
738 return NULL;
739
Oren Milman39ecb9c2017-10-10 23:26:24 +0300740 Py_INCREF(JOIN_OBJ(self->text));
741 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742
Oren Milman39ecb9c2017-10-10 23:26:24 +0300743 Py_INCREF(JOIN_OBJ(self->tail));
744 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300746 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000747 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000748 if (element_resize(element, self->extra->length) < 0) {
749 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000751 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752
753 for (i = 0; i < self->extra->length; i++) {
754 Py_INCREF(self->extra->children[i]);
755 element->extra->children[i] = self->extra->children[i];
756 }
757
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300758 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000760 }
761
762 return (PyObject*) element;
763}
764
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200765/* Helper for a deep copy. */
766LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
767
Serhiy Storchakacb985562015-05-04 15:32:48 +0300768/*[clinic input]
769_elementtree.Element.__deepcopy__
770
Oren Milmand0568182017-09-12 17:39:15 +0300771 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300772 /
773
774[clinic start generated code]*/
775
776static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300777_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
778/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000779{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200780 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000781 ElementObject* element;
782 PyObject* tag;
783 PyObject* attrib;
784 PyObject* text;
785 PyObject* tail;
786 PyObject* id;
787
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000788 tag = deepcopy(self->tag, memo);
789 if (!tag)
790 return NULL;
791
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200792 if (self->extra && self->extra->attrib) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000793 attrib = deepcopy(self->extra->attrib, memo);
794 if (!attrib) {
795 Py_DECREF(tag);
796 return NULL;
797 }
798 } else {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200799 attrib = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 }
801
Eli Bendersky092af1f2012-03-04 07:14:03 +0200802 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 Py_DECREF(tag);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200805 Py_XDECREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806
807 if (!element)
808 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100809
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000810 text = deepcopy(JOIN_OBJ(self->text), memo);
811 if (!text)
812 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300813 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000814
815 tail = deepcopy(JOIN_OBJ(self->tail), memo);
816 if (!tail)
817 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300818 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300820 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000821 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822 if (element_resize(element, self->extra->length) < 0)
823 goto error;
824
825 for (i = 0; i < self->extra->length; i++) {
826 PyObject* child = deepcopy(self->extra->children[i], memo);
Serhiy Storchakaf081fd82018-10-19 12:12:57 +0300827 if (!child || !Element_Check(child)) {
828 if (child) {
829 raise_type_error(child);
830 Py_DECREF(child);
831 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000832 element->extra->length = i;
833 goto error;
834 }
835 element->extra->children[i] = child;
836 }
837
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300838 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000839 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000840 }
841
842 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700843 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000844 if (!id)
845 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000846
847 i = PyDict_SetItem(memo, id, (PyObject*) element);
848
849 Py_DECREF(id);
850
851 if (i < 0)
852 goto error;
853
854 return (PyObject*) element;
855
856 error:
857 Py_DECREF(element);
858 return NULL;
859}
860
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200861LOCAL(PyObject *)
862deepcopy(PyObject *object, PyObject *memo)
863{
864 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200865 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200866 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200867
868 /* Fast paths */
869 if (object == Py_None || PyUnicode_CheckExact(object)) {
870 Py_INCREF(object);
871 return object;
872 }
873
874 if (Py_REFCNT(object) == 1) {
875 if (PyDict_CheckExact(object)) {
876 PyObject *key, *value;
877 Py_ssize_t pos = 0;
878 int simple = 1;
879 while (PyDict_Next(object, &pos, &key, &value)) {
880 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
881 simple = 0;
882 break;
883 }
884 }
885 if (simple)
886 return PyDict_Copy(object);
887 /* Fall through to general case */
888 }
889 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300890 return _elementtree_Element___deepcopy___impl(
891 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200892 }
893 }
894
895 /* General case */
896 st = ET_STATE_GLOBAL;
897 if (!st->deepcopy_obj) {
898 PyErr_SetString(PyExc_RuntimeError,
899 "deepcopy helper not found");
900 return NULL;
901 }
902
Victor Stinner7fbac452016-08-20 01:34:44 +0200903 stack[0] = object;
904 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200905 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200906}
907
908
Serhiy Storchakacb985562015-05-04 15:32:48 +0300909/*[clinic input]
910_elementtree.Element.__sizeof__ -> Py_ssize_t
911
912[clinic start generated code]*/
913
914static Py_ssize_t
915_elementtree_Element___sizeof___impl(ElementObject *self)
916/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200917{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200918 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200919 if (self->extra) {
920 result += sizeof(ElementObjectExtra);
921 if (self->extra->children != self->extra->_children)
922 result += sizeof(PyObject*) * self->extra->allocated;
923 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300924 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200925}
926
Eli Bendersky698bdb22013-01-10 06:01:06 -0800927/* dict keys for getstate/setstate. */
928#define PICKLED_TAG "tag"
929#define PICKLED_CHILDREN "_children"
930#define PICKLED_ATTRIB "attrib"
931#define PICKLED_TAIL "tail"
932#define PICKLED_TEXT "text"
933
934/* __getstate__ returns a fabricated instance dict as in the pure-Python
935 * Element implementation, for interoperability/interchangeability. This
936 * makes the pure-Python implementation details an API, but (a) there aren't
937 * any unnecessary structures there; and (b) it buys compatibility with 3.2
938 * pickles. See issue #16076.
939 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300940/*[clinic input]
941_elementtree.Element.__getstate__
942
943[clinic start generated code]*/
944
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300946_elementtree_Element___getstate___impl(ElementObject *self)
947/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800948{
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200949 Py_ssize_t i;
950 PyObject *children, *attrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800951
952 /* Build a list of children. */
953 children = PyList_New(self->extra ? self->extra->length : 0);
954 if (!children)
955 return NULL;
956 for (i = 0; i < PyList_GET_SIZE(children); i++) {
957 PyObject *child = self->extra->children[i];
958 Py_INCREF(child);
959 PyList_SET_ITEM(children, i, child);
960 }
961
Serhiy Storchakadccd41e2020-03-09 15:12:41 +0200962 if (self->extra && self->extra->attrib) {
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200963 attrib = self->extra->attrib;
964 Py_INCREF(attrib);
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800965 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966 else {
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200967 attrib = PyDict_New();
968 if (!attrib) {
969 Py_DECREF(children);
970 return NULL;
971 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972 }
Serhiy Storchaka88944a42020-03-09 14:37:08 +0200973
974 return Py_BuildValue("{sOsNsNsOsO}",
975 PICKLED_TAG, self->tag,
976 PICKLED_CHILDREN, children,
977 PICKLED_ATTRIB, attrib,
978 PICKLED_TEXT, JOIN_OBJ(self->text),
979 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800980}
981
982static PyObject *
983element_setstate_from_attributes(ElementObject *self,
984 PyObject *tag,
985 PyObject *attrib,
986 PyObject *text,
987 PyObject *tail,
988 PyObject *children)
989{
990 Py_ssize_t i, nchildren;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +0300991 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800992
993 if (!tag) {
994 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
995 return NULL;
996 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200998 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300999 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001000
Oren Milman39ecb9c2017-10-10 23:26:24 +03001001 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
1002 Py_INCREF(JOIN_OBJ(text));
1003 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004
Oren Milman39ecb9c2017-10-10 23:26:24 +03001005 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
1006 Py_INCREF(JOIN_OBJ(tail));
1007 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008
1009 /* Handle ATTRIB and CHILDREN. */
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001010 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011 Py_RETURN_NONE;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001012 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013
1014 /* Compute 'nchildren'. */
1015 if (children) {
1016 if (!PyList_Check(children)) {
1017 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1018 return NULL;
1019 }
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001020 nchildren = PyList_GET_SIZE(children);
1021
1022 /* (Re-)allocate 'extra'.
1023 Avoid DECREFs calling into this code again (cycles, etc.)
1024 */
1025 oldextra = self->extra;
1026 self->extra = NULL;
1027 if (element_resize(self, nchildren)) {
1028 assert(!self->extra || !self->extra->length);
1029 clear_extra(self);
1030 self->extra = oldextra;
1031 return NULL;
1032 }
1033 assert(self->extra);
1034 assert(self->extra->allocated >= nchildren);
1035 if (oldextra) {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001036 assert(self->extra->attrib == NULL);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001037 self->extra->attrib = oldextra->attrib;
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001038 oldextra->attrib = NULL;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001039 }
1040
1041 /* Copy children */
1042 for (i = 0; i < nchildren; i++) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001043 PyObject *child = PyList_GET_ITEM(children, i);
1044 if (!Element_Check(child)) {
1045 raise_type_error(child);
1046 self->extra->length = i;
1047 dealloc_extra(oldextra);
1048 return NULL;
1049 }
1050 Py_INCREF(child);
1051 self->extra->children[i] = child;
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001052 }
1053
1054 assert(!self->extra->length);
1055 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001056 }
1057 else {
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001058 if (element_resize(self, 0)) {
1059 return NULL;
1060 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001061 }
1062
Eli Bendersky698bdb22013-01-10 06:01:06 -08001063 /* Stash attrib. */
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001064 Py_XINCREF(attrib);
1065 Py_XSETREF(self->extra->attrib, attrib);
Serhiy Storchaka6f906b32018-10-18 09:49:54 +03001066 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067
1068 Py_RETURN_NONE;
1069}
1070
1071/* __setstate__ for Element instance from the Python implementation.
1072 * 'state' should be the instance dict.
1073 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001074
Eli Bendersky698bdb22013-01-10 06:01:06 -08001075static PyObject *
1076element_setstate_from_Python(ElementObject *self, PyObject *state)
1077{
1078 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1079 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1080 PyObject *args;
1081 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001082 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001083
Eli Bendersky698bdb22013-01-10 06:01:06 -08001084 tag = attrib = text = tail = children = NULL;
1085 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001086 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001087 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001088
1089 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1090 &attrib, &text, &tail, &children))
1091 retval = element_setstate_from_attributes(self, tag, attrib, text,
1092 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001093 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001094 retval = NULL;
1095
1096 Py_DECREF(args);
1097 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001098}
1099
Serhiy Storchakacb985562015-05-04 15:32:48 +03001100/*[clinic input]
1101_elementtree.Element.__setstate__
1102
1103 state: object
1104 /
1105
1106[clinic start generated code]*/
1107
Eli Bendersky698bdb22013-01-10 06:01:06 -08001108static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001109_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1110/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001111{
1112 if (!PyDict_CheckExact(state)) {
1113 PyErr_Format(PyExc_TypeError,
1114 "Don't know how to unpickle \"%.200R\" as an Element",
1115 state);
1116 return NULL;
1117 }
1118 else
1119 return element_setstate_from_Python(self, state);
1120}
1121
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001122LOCAL(int)
1123checkpath(PyObject* tag)
1124{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001125 Py_ssize_t i;
1126 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001127
1128 /* check if a tag contains an xpath character */
1129
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130#define PATHCHAR(ch) \
1131 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001133 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001135 const void *data = PyUnicode_DATA(tag);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 unsigned int kind = PyUnicode_KIND(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001137 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1138 PyUnicode_READ(kind, data, 1) == '}' || (
1139 PyUnicode_READ(kind, data, 1) == '*' &&
1140 PyUnicode_READ(kind, data, 2) == '}'))) {
1141 /* wildcard: '{}tag' or '{*}tag' */
1142 return 1;
1143 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 for (i = 0; i < len; i++) {
1145 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1146 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001149 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151 return 1;
1152 }
1153 return 0;
1154 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001155 if (PyBytes_Check(tag)) {
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001156 const char *p = PyBytes_AS_STRING(tag);
Stefan Behnel47541682019-05-03 20:58:16 +02001157 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1158 if (len >= 3 && p[0] == '{' && (
Stefan Behnel6b951492019-05-06 17:36:35 +02001159 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
Stefan Behnel47541682019-05-03 20:58:16 +02001160 /* wildcard: '{}tag' or '{*}tag' */
1161 return 1;
1162 }
1163 for (i = 0; i < len; i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001164 if (p[i] == '{')
1165 check = 0;
1166 else if (p[i] == '}')
1167 check = 1;
1168 else if (check && PATHCHAR(p[i]))
1169 return 1;
1170 }
1171 return 0;
1172 }
1173
1174 return 1; /* unknown type; might be path expression */
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.extend
1179
1180 elements: object
1181 /
1182
1183[clinic start generated code]*/
1184
1185static PyObject *
1186_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1187/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188{
1189 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191
Serhiy Storchakacb985562015-05-04 15:32:48 +03001192 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193 if (!seq) {
1194 PyErr_Format(
1195 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001197 );
1198 return NULL;
1199 }
1200
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001201 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001202 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001203 Py_INCREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001204 if (element_add_subelement(self, element) < 0) {
1205 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001206 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001207 return NULL;
1208 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001209 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001210 }
1211
1212 Py_DECREF(seq);
1213
1214 Py_RETURN_NONE;
1215}
1216
Serhiy Storchakacb985562015-05-04 15:32:48 +03001217/*[clinic input]
1218_elementtree.Element.find
1219
1220 path: object
1221 namespaces: object = None
1222
1223[clinic start generated code]*/
1224
1225static PyObject *
1226_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1227 PyObject *namespaces)
1228/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001230 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001231 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001232
Serhiy Storchakacb985562015-05-04 15:32:48 +03001233 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001234 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001235 return _PyObject_CallMethodIdObjArgs(
1236 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001238 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239
1240 if (!self->extra)
1241 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001242
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001243 for (i = 0; i < self->extra->length; i++) {
1244 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001246 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001248 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 }
1255
1256 Py_RETURN_NONE;
1257}
1258
Serhiy Storchakacb985562015-05-04 15:32:48 +03001259/*[clinic input]
1260_elementtree.Element.findtext
1261
1262 path: object
1263 default: object = None
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1270 PyObject *default_value,
1271 PyObject *namespaces)
1272/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001274 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001275 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001276 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277
Serhiy Storchakacb985562015-05-04 15:32:48 +03001278 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001279 return _PyObject_CallMethodIdObjArgs(
1280 st->elementpath_obj, &PyId_findtext,
1281 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 );
1283
1284 if (!self->extra) {
1285 Py_INCREF(default_value);
1286 return default_value;
1287 }
1288
1289 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001290 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001292 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001294 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 if (rc > 0) {
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001296 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 if (text == Py_None) {
1298 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001299 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001300 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001301 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 return text;
1304 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001305 Py_DECREF(item);
1306 if (rc < 0)
1307 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001308 }
1309
1310 Py_INCREF(default_value);
1311 return default_value;
1312}
1313
Serhiy Storchakacb985562015-05-04 15:32:48 +03001314/*[clinic input]
1315_elementtree.Element.findall
1316
1317 path: object
1318 namespaces: object = None
1319
1320[clinic start generated code]*/
1321
1322static PyObject *
1323_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1324 PyObject *namespaces)
1325/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001326{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001327 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001328 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001329 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001330
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001331 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001332 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001333 return _PyObject_CallMethodIdObjArgs(
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001334 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001335 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001336 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001337
1338 out = PyList_New(0);
1339 if (!out)
1340 return NULL;
1341
1342 if (!self->extra)
1343 return out;
1344
1345 for (i = 0; i < self->extra->length; i++) {
1346 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001347 int rc;
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001348 assert(Element_Check(item));
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001349 Py_INCREF(item);
Serhiy Storchakab11c5662018-10-14 10:32:19 +03001350 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001351 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1352 Py_DECREF(item);
1353 Py_DECREF(out);
1354 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001355 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001356 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001357 }
1358
1359 return out;
1360}
1361
Serhiy Storchakacb985562015-05-04 15:32:48 +03001362/*[clinic input]
1363_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365 path: object
1366 namespaces: object = None
1367
1368[clinic start generated code]*/
1369
1370static PyObject *
1371_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1372 PyObject *namespaces)
1373/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1374{
1375 PyObject* tag = path;
1376 _Py_IDENTIFIER(iterfind);
1377 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001378
Victor Stinnerf5616342016-12-09 15:26:00 +01001379 return _PyObject_CallMethodIdObjArgs(
1380 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001381}
1382
Serhiy Storchakacb985562015-05-04 15:32:48 +03001383/*[clinic input]
1384_elementtree.Element.get
1385
1386 key: object
1387 default: object = None
1388
1389[clinic start generated code]*/
1390
1391static PyObject *
1392_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1393 PyObject *default_value)
1394/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001395{
Miss Islington (bot)beb83422021-12-05 11:04:52 -08001396 if (self->extra && self->extra->attrib) {
1397 PyObject *attrib = self->extra->attrib;
1398 Py_INCREF(attrib);
1399 PyObject *value = PyDict_GetItemWithError(attrib, key);
1400 Py_XINCREF(value);
1401 Py_DECREF(attrib);
1402 if (value != NULL || PyErr_Occurred()) {
1403 return value;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001404 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001405 }
1406
Miss Islington (bot)beb83422021-12-05 11:04:52 -08001407 Py_INCREF(default_value);
1408 return default_value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409}
1410
Eli Bendersky64d11e62012-06-15 07:42:50 +03001411static PyObject *
1412create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1413
1414
Serhiy Storchakacb985562015-05-04 15:32:48 +03001415/*[clinic input]
1416_elementtree.Element.iter
1417
1418 tag: object = None
1419
1420[clinic start generated code]*/
1421
Eli Bendersky64d11e62012-06-15 07:42:50 +03001422static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001423_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1424/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001425{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001426 if (PyUnicode_Check(tag)) {
1427 if (PyUnicode_READY(tag) < 0)
1428 return NULL;
1429 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1430 tag = Py_None;
1431 }
1432 else if (PyBytes_Check(tag)) {
1433 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1434 tag = Py_None;
1435 }
1436
Eli Bendersky64d11e62012-06-15 07:42:50 +03001437 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001438}
1439
1440
Serhiy Storchakacb985562015-05-04 15:32:48 +03001441/*[clinic input]
1442_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001443
Serhiy Storchakacb985562015-05-04 15:32:48 +03001444[clinic start generated code]*/
1445
1446static PyObject *
1447_elementtree_Element_itertext_impl(ElementObject *self)
1448/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1449{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001450 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001451}
1452
Eli Bendersky64d11e62012-06-15 07:42:50 +03001453
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001455element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001457 ElementObject* self = (ElementObject*) self_;
1458
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459 if (!self->extra || index < 0 || index >= self->extra->length) {
1460 PyErr_SetString(
1461 PyExc_IndexError,
1462 "child index out of range"
1463 );
1464 return NULL;
1465 }
1466
1467 Py_INCREF(self->extra->children[index]);
1468 return self->extra->children[index];
1469}
1470
Serhiy Storchakacb985562015-05-04 15:32:48 +03001471/*[clinic input]
1472_elementtree.Element.insert
1473
1474 index: Py_ssize_t
1475 subelement: object(subclass_of='&Element_Type')
1476 /
1477
1478[clinic start generated code]*/
1479
1480static PyObject *
1481_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1482 PyObject *subelement)
1483/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001485 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486
Victor Stinner5f0af232013-07-11 23:01:36 +02001487 if (!self->extra) {
1488 if (create_extra(self, NULL) < 0)
1489 return NULL;
1490 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001492 if (index < 0) {
1493 index += self->extra->length;
1494 if (index < 0)
1495 index = 0;
1496 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497 if (index > self->extra->length)
1498 index = self->extra->length;
1499
1500 if (element_resize(self, 1) < 0)
1501 return NULL;
1502
1503 for (i = self->extra->length; i > index; i--)
1504 self->extra->children[i] = self->extra->children[i-1];
1505
Serhiy Storchakacb985562015-05-04 15:32:48 +03001506 Py_INCREF(subelement);
1507 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001508
1509 self->extra->length++;
1510
1511 Py_RETURN_NONE;
1512}
1513
Serhiy Storchakacb985562015-05-04 15:32:48 +03001514/*[clinic input]
1515_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517[clinic start generated code]*/
1518
1519static PyObject *
1520_elementtree_Element_items_impl(ElementObject *self)
1521/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1522{
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001523 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524 return PyList_New(0);
1525
1526 return PyDict_Items(self->extra->attrib);
1527}
1528
Serhiy Storchakacb985562015-05-04 15:32:48 +03001529/*[clinic input]
1530_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001531
Serhiy Storchakacb985562015-05-04 15:32:48 +03001532[clinic start generated code]*/
1533
1534static PyObject *
1535_elementtree_Element_keys_impl(ElementObject *self)
1536/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1537{
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001538 if (!self->extra || !self->extra->attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539 return PyList_New(0);
1540
1541 return PyDict_Keys(self->extra->attrib);
1542}
1543
Martin v. Löwis18e16552006-02-15 17:27:45 +00001544static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545element_length(ElementObject* self)
1546{
1547 if (!self->extra)
1548 return 0;
1549
1550 return self->extra->length;
1551}
1552
Serhiy Storchakacb985562015-05-04 15:32:48 +03001553/*[clinic input]
1554_elementtree.Element.makeelement
1555
1556 tag: object
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001557 attrib: object(subclass_of='&PyDict_Type')
Serhiy Storchakacb985562015-05-04 15:32:48 +03001558 /
1559
1560[clinic start generated code]*/
1561
1562static PyObject *
1563_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1564 PyObject *attrib)
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02001565/*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566{
1567 PyObject* elem;
1568
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 attrib = PyDict_Copy(attrib);
1570 if (!attrib)
1571 return NULL;
1572
Eli Bendersky092af1f2012-03-04 07:14:03 +02001573 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574
1575 Py_DECREF(attrib);
1576
1577 return elem;
1578}
1579
Serhiy Storchakacb985562015-05-04 15:32:48 +03001580/*[clinic input]
1581_elementtree.Element.remove
1582
1583 subelement: object(subclass_of='&Element_Type')
1584 /
1585
1586[clinic start generated code]*/
1587
1588static PyObject *
1589_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1590/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001592 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001593 int rc;
1594 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596 if (!self->extra) {
1597 /* element has no children, so raise exception */
1598 PyErr_SetString(
1599 PyExc_ValueError,
1600 "list.remove(x): x not in list"
1601 );
1602 return NULL;
1603 }
1604
1605 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001606 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001608 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001609 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001611 if (rc < 0)
1612 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 }
1614
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001615 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001616 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617 PyErr_SetString(
1618 PyExc_ValueError,
1619 "list.remove(x): x not in list"
1620 );
1621 return NULL;
1622 }
1623
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001624 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001625
1626 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627 for (; i < self->extra->length; i++)
1628 self->extra->children[i] = self->extra->children[i+1];
1629
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001630 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 Py_RETURN_NONE;
1632}
1633
1634static PyObject*
1635element_repr(ElementObject* self)
1636{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001637 int status;
1638
1639 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001640 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001641
1642 status = Py_ReprEnter((PyObject *)self);
1643 if (status == 0) {
1644 PyObject *res;
1645 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1646 Py_ReprLeave((PyObject *)self);
1647 return res;
1648 }
1649 if (status > 0)
1650 PyErr_Format(PyExc_RuntimeError,
1651 "reentrant call inside %s.__repr__",
1652 Py_TYPE(self)->tp_name);
1653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001654}
1655
Serhiy Storchakacb985562015-05-04 15:32:48 +03001656/*[clinic input]
1657_elementtree.Element.set
1658
1659 key: object
1660 value: object
1661 /
1662
1663[clinic start generated code]*/
1664
1665static PyObject *
1666_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1667 PyObject *value)
1668/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669{
1670 PyObject* attrib;
1671
Victor Stinner5f0af232013-07-11 23:01:36 +02001672 if (!self->extra) {
1673 if (create_extra(self, NULL) < 0)
1674 return NULL;
1675 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001676
1677 attrib = element_get_attrib(self);
1678 if (!attrib)
1679 return NULL;
1680
1681 if (PyDict_SetItem(attrib, key, value) < 0)
1682 return NULL;
1683
1684 Py_RETURN_NONE;
1685}
1686
1687static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001688element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001689{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001690 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001691 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692 PyObject* old;
1693
1694 if (!self->extra || index < 0 || index >= self->extra->length) {
1695 PyErr_SetString(
1696 PyExc_IndexError,
1697 "child assignment index out of range");
1698 return -1;
1699 }
1700
1701 old = self->extra->children[index];
1702
1703 if (item) {
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001704 if (!Element_Check(item)) {
1705 raise_type_error(item);
1706 return -1;
1707 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001708 Py_INCREF(item);
1709 self->extra->children[index] = item;
1710 } else {
1711 self->extra->length--;
1712 for (i = index; i < self->extra->length; i++)
1713 self->extra->children[i] = self->extra->children[i+1];
1714 }
1715
1716 Py_DECREF(old);
1717
1718 return 0;
1719}
1720
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001721static PyObject*
1722element_subscr(PyObject* self_, PyObject* item)
1723{
1724 ElementObject* self = (ElementObject*) self_;
1725
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726 if (PyIndex_Check(item)) {
1727 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001728
1729 if (i == -1 && PyErr_Occurred()) {
1730 return NULL;
1731 }
1732 if (i < 0 && self->extra)
1733 i += self->extra->length;
1734 return element_getitem(self_, i);
1735 }
1736 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001737 Py_ssize_t start, stop, step, slicelen, i;
1738 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001739 PyObject* list;
1740
1741 if (!self->extra)
1742 return PyList_New(0);
1743
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001744 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745 return NULL;
1746 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001747 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1748 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749
1750 if (slicelen <= 0)
1751 return PyList_New(0);
1752 else {
1753 list = PyList_New(slicelen);
1754 if (!list)
1755 return NULL;
1756
1757 for (cur = start, i = 0; i < slicelen;
1758 cur += step, i++) {
1759 PyObject* item = self->extra->children[cur];
1760 Py_INCREF(item);
1761 PyList_SET_ITEM(list, i, item);
1762 }
1763
1764 return list;
1765 }
1766 }
1767 else {
1768 PyErr_SetString(PyExc_TypeError,
1769 "element indices must be integers");
1770 return NULL;
1771 }
1772}
1773
1774static int
1775element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1776{
1777 ElementObject* self = (ElementObject*) self_;
1778
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779 if (PyIndex_Check(item)) {
1780 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001781
1782 if (i == -1 && PyErr_Occurred()) {
1783 return -1;
1784 }
1785 if (i < 0 && self->extra)
1786 i += self->extra->length;
1787 return element_setitem(self_, i, value);
1788 }
1789 else if (PySlice_Check(item)) {
Zackery Spytz14514d92019-05-17 01:13:03 -06001790 Py_ssize_t start, stop, step, slicelen, newlen, i;
1791 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001792
1793 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001794 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795
Victor Stinner5f0af232013-07-11 23:01:36 +02001796 if (!self->extra) {
1797 if (create_extra(self, NULL) < 0)
1798 return -1;
1799 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001800
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001801 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802 return -1;
1803 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001804 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1805 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001806
Eli Bendersky865756a2012-03-09 13:38:15 +02001807 if (value == NULL) {
1808 /* Delete slice */
1809 size_t cur;
1810 Py_ssize_t i;
1811
1812 if (slicelen <= 0)
1813 return 0;
1814
1815 /* Since we're deleting, the direction of the range doesn't matter,
1816 * so for simplicity make it always ascending.
1817 */
1818 if (step < 0) {
1819 stop = start + 1;
1820 start = stop + step * (slicelen - 1) - 1;
1821 step = -step;
1822 }
1823
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001824 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001825
1826 /* recycle is a list that will contain all the children
1827 * scheduled for removal.
1828 */
1829 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001830 return -1;
1831 }
1832
1833 /* This loop walks over all the children that have to be deleted,
1834 * with cur pointing at them. num_moved is the amount of children
1835 * until the next deleted child that have to be "shifted down" to
1836 * occupy the deleted's places.
1837 * Note that in the ith iteration, shifting is done i+i places down
1838 * because i children were already removed.
1839 */
1840 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1841 /* Compute how many children have to be moved, clipping at the
1842 * list end.
1843 */
1844 Py_ssize_t num_moved = step - 1;
1845 if (cur + step >= (size_t)self->extra->length) {
1846 num_moved = self->extra->length - cur - 1;
1847 }
1848
1849 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1850
1851 memmove(
1852 self->extra->children + cur - i,
1853 self->extra->children + cur + 1,
1854 num_moved * sizeof(PyObject *));
1855 }
1856
1857 /* Leftover "tail" after the last removed child */
1858 cur = start + (size_t)slicelen * step;
1859 if (cur < (size_t)self->extra->length) {
1860 memmove(
1861 self->extra->children + cur - slicelen,
1862 self->extra->children + cur,
1863 (self->extra->length - cur) * sizeof(PyObject *));
1864 }
1865
1866 self->extra->length -= slicelen;
1867
1868 /* Discard the recycle list with all the deleted sub-elements */
Zackery Spytz9f3ed3e2018-10-23 13:28:06 -06001869 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001870 return 0;
1871 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001872
1873 /* A new slice is actually being assigned */
1874 seq = PySequence_Fast(value, "");
1875 if (!seq) {
1876 PyErr_Format(
1877 PyExc_TypeError,
1878 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1879 );
1880 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001882 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883
1884 if (step != 1 && newlen != slicelen)
1885 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001886 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001888 "attempt to assign sequence of size %zd "
1889 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001890 newlen, slicelen
1891 );
1892 return -1;
1893 }
1894
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001895 /* Resize before creating the recycle bin, to prevent refleaks. */
1896 if (newlen > slicelen) {
1897 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001898 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001899 return -1;
1900 }
1901 }
1902
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03001903 for (i = 0; i < newlen; i++) {
1904 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1905 if (!Element_Check(element)) {
1906 raise_type_error(element);
1907 Py_DECREF(seq);
1908 return -1;
1909 }
1910 }
1911
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001912 if (slicelen > 0) {
1913 /* to avoid recursive calls to this method (via decref), move
1914 old items to the recycle bin here, and get rid of them when
1915 we're done modifying the element */
1916 recycle = PyList_New(slicelen);
1917 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001918 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919 return -1;
1920 }
1921 for (cur = start, i = 0; i < slicelen;
1922 cur += step, i++)
1923 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1924 }
1925
1926 if (newlen < slicelen) {
1927 /* delete slice */
1928 for (i = stop; i < self->extra->length; i++)
1929 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1930 } else if (newlen > slicelen) {
1931 /* insert slice */
1932 for (i = self->extra->length-1; i >= stop; i--)
1933 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1934 }
1935
1936 /* replace the slice */
1937 for (cur = start, i = 0; i < newlen;
1938 cur += step, i++) {
1939 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1940 Py_INCREF(element);
1941 self->extra->children[cur] = element;
1942 }
1943
1944 self->extra->length += newlen - slicelen;
1945
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001946 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947
1948 /* discard the recycle bin, and everything in it */
1949 Py_XDECREF(recycle);
1950
1951 return 0;
1952 }
1953 else {
1954 PyErr_SetString(PyExc_TypeError,
1955 "element indices must be integers");
1956 return -1;
1957 }
1958}
1959
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001960static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001961element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001963 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001964 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001965 return res;
1966}
1967
Serhiy Storchakadde08152015-11-25 15:28:13 +02001968static PyObject*
1969element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001970{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001971 PyObject *res = element_get_text(self);
1972 Py_XINCREF(res);
1973 return res;
1974}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001975
Serhiy Storchakadde08152015-11-25 15:28:13 +02001976static PyObject*
1977element_tail_getter(ElementObject *self, void *closure)
1978{
1979 PyObject *res = element_get_tail(self);
1980 Py_XINCREF(res);
1981 return res;
1982}
1983
1984static PyObject*
1985element_attrib_getter(ElementObject *self, void *closure)
1986{
1987 PyObject *res;
1988 if (!self->extra) {
1989 if (create_extra(self, NULL) < 0)
1990 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001991 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001992 res = element_get_attrib(self);
1993 Py_XINCREF(res);
1994 return res;
1995}
Victor Stinner4d463432013-07-11 23:05:03 +02001996
Serhiy Storchakadde08152015-11-25 15:28:13 +02001997/* macro for setter validation */
1998#define _VALIDATE_ATTR_VALUE(V) \
1999 if ((V) == NULL) { \
2000 PyErr_SetString( \
2001 PyExc_AttributeError, \
2002 "can't delete element attribute"); \
2003 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002004 }
2005
Serhiy Storchakadde08152015-11-25 15:28:13 +02002006static int
2007element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2008{
2009 _VALIDATE_ATTR_VALUE(value);
2010 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002011 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002012 return 0;
2013}
2014
2015static int
2016element_text_setter(ElementObject *self, PyObject *value, void *closure)
2017{
2018 _VALIDATE_ATTR_VALUE(value);
2019 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002020 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002021 return 0;
2022}
2023
2024static int
2025element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2026{
2027 _VALIDATE_ATTR_VALUE(value);
2028 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002029 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002030 return 0;
2031}
2032
2033static int
2034element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2035{
2036 _VALIDATE_ATTR_VALUE(value);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002037 if (!PyDict_Check(value)) {
2038 PyErr_Format(PyExc_TypeError,
2039 "attrib must be dict, not %.200s",
Victor Stinner8182cc22020-07-10 12:40:38 +02002040 Py_TYPE(value)->tp_name);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002041 return -1;
2042 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002043 if (!self->extra) {
2044 if (create_extra(self, NULL) < 0)
2045 return -1;
2046 }
2047 Py_INCREF(value);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002048 Py_XSETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002049 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002050}
2051
2052static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002053 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002054 0, /* sq_concat */
2055 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002056 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002057 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002058 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002059 0,
2060};
2061
Eli Bendersky64d11e62012-06-15 07:42:50 +03002062/******************************* Element iterator ****************************/
2063
2064/* ElementIterObject represents the iteration state over an XML element in
2065 * pre-order traversal. To keep track of which sub-element should be returned
2066 * next, a stack of parents is maintained. This is a standard stack-based
2067 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002068 * The stack is managed using a continuous array.
2069 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002070 * the current one is exhausted, and the next child to examine in that parent.
2071 */
2072typedef struct ParentLocator_t {
2073 ElementObject *parent;
2074 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002075} ParentLocator;
2076
2077typedef struct {
2078 PyObject_HEAD
2079 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002080 Py_ssize_t parent_stack_used;
2081 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082 ElementObject *root_element;
2083 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084 int gettext;
2085} ElementIterObject;
2086
2087
2088static void
2089elementiter_dealloc(ElementIterObject *it)
2090{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002091 Py_ssize_t i = it->parent_stack_used;
2092 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002093 /* bpo-31095: UnTrack is needed before calling any callbacks */
2094 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002095 while (i--)
2096 Py_XDECREF(it->parent_stack[i].parent);
2097 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098
2099 Py_XDECREF(it->sought_tag);
2100 Py_XDECREF(it->root_element);
2101
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102 PyObject_GC_Del(it);
2103}
2104
2105static int
2106elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2107{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002108 Py_ssize_t i = it->parent_stack_used;
2109 while (i--)
2110 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111
2112 Py_VISIT(it->root_element);
2113 Py_VISIT(it->sought_tag);
2114 return 0;
2115}
2116
2117/* Helper function for elementiter_next. Add a new parent to the parent stack.
2118 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119static int
2120parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122 ParentLocator *item;
2123
2124 if (it->parent_stack_used >= it->parent_stack_size) {
2125 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2126 ParentLocator *parent_stack = it->parent_stack;
2127 PyMem_Resize(parent_stack, ParentLocator, new_size);
2128 if (parent_stack == NULL)
2129 return -1;
2130 it->parent_stack = parent_stack;
2131 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002133 item = it->parent_stack + it->parent_stack_used++;
2134 Py_INCREF(parent);
2135 item->parent = parent;
2136 item->child_index = 0;
2137 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138}
2139
2140static PyObject *
2141elementiter_next(ElementIterObject *it)
2142{
2143 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002144 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145 * A short note on gettext: this function serves both the iter() and
2146 * itertext() methods to avoid code duplication. However, there are a few
2147 * small differences in the way these iterations work. Namely:
2148 * - itertext() only yields text from nodes that have it, and continues
2149 * iterating when a node doesn't have text (so it doesn't return any
2150 * node like iter())
2151 * - itertext() also has to handle tail, after finishing with all the
2152 * children of a node.
2153 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002154 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002155 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002156 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002157
2158 while (1) {
2159 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002160 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002161 * iterator is exhausted.
2162 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002163 if (!it->parent_stack_used) {
2164 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002165 PyErr_SetNone(PyExc_StopIteration);
2166 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002167 }
2168
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002169 elem = it->root_element; /* steals a reference */
2170 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002171 }
2172 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002173 /* See if there are children left to traverse in the current parent. If
2174 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002175 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2177 Py_ssize_t child_index = item->child_index;
2178 ElementObjectExtra *extra;
2179 elem = item->parent;
2180 extra = elem->extra;
2181 if (!extra || child_index >= extra->length) {
2182 it->parent_stack_used--;
2183 /* Note that extra condition on it->parent_stack_used here;
2184 * this is because itertext() is supposed to only return *inner*
2185 * text, not text following the element it began iteration with.
2186 */
2187 if (it->gettext && it->parent_stack_used) {
2188 text = element_get_tail(elem);
2189 goto gettext;
2190 }
2191 Py_DECREF(elem);
2192 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002193 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002194
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002195 assert(Element_Check(extra->children[child_index]));
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002196 elem = (ElementObject *)extra->children[child_index];
2197 item->child_index++;
2198 Py_INCREF(elem);
2199 }
2200
2201 if (parent_stack_push_new(it, elem) < 0) {
2202 Py_DECREF(elem);
2203 PyErr_NoMemory();
2204 return NULL;
2205 }
2206 if (it->gettext) {
2207 text = element_get_text(elem);
2208 goto gettext;
2209 }
2210
2211 if (it->sought_tag == Py_None)
2212 return (PyObject *)elem;
2213
2214 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2215 if (rc > 0)
2216 return (PyObject *)elem;
2217
2218 Py_DECREF(elem);
2219 if (rc < 0)
2220 return NULL;
2221 continue;
2222
2223gettext:
2224 if (!text) {
2225 Py_DECREF(elem);
2226 return NULL;
2227 }
2228 if (text == Py_None) {
2229 Py_DECREF(elem);
2230 }
2231 else {
2232 Py_INCREF(text);
2233 Py_DECREF(elem);
2234 rc = PyObject_IsTrue(text);
2235 if (rc > 0)
2236 return text;
2237 Py_DECREF(text);
2238 if (rc < 0)
2239 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002240 }
2241 }
2242
2243 return NULL;
2244}
2245
2246
2247static PyTypeObject ElementIter_Type = {
2248 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002249 /* Using the module's name since the pure-Python implementation does not
2250 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251 "_elementtree._element_iterator", /* tp_name */
2252 sizeof(ElementIterObject), /* tp_basicsize */
2253 0, /* tp_itemsize */
2254 /* methods */
2255 (destructor)elementiter_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002256 0, /* tp_vectorcall_offset */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 0, /* tp_getattr */
2258 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02002259 0, /* tp_as_async */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260 0, /* tp_repr */
2261 0, /* tp_as_number */
2262 0, /* tp_as_sequence */
2263 0, /* tp_as_mapping */
2264 0, /* tp_hash */
2265 0, /* tp_call */
2266 0, /* tp_str */
2267 0, /* tp_getattro */
2268 0, /* tp_setattro */
2269 0, /* tp_as_buffer */
2270 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2271 0, /* tp_doc */
2272 (traverseproc)elementiter_traverse, /* tp_traverse */
2273 0, /* tp_clear */
2274 0, /* tp_richcompare */
2275 0, /* tp_weaklistoffset */
2276 PyObject_SelfIter, /* tp_iter */
2277 (iternextfunc)elementiter_next, /* tp_iternext */
2278 0, /* tp_methods */
2279 0, /* tp_members */
2280 0, /* tp_getset */
2281 0, /* tp_base */
2282 0, /* tp_dict */
2283 0, /* tp_descr_get */
2284 0, /* tp_descr_set */
2285 0, /* tp_dictoffset */
2286 0, /* tp_init */
2287 0, /* tp_alloc */
2288 0, /* tp_new */
2289};
2290
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002291#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002292
2293static PyObject *
2294create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2295{
2296 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002297
2298 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2299 if (!it)
2300 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002301
Victor Stinner4d463432013-07-11 23:05:03 +02002302 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002303 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002304 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002305 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002306 it->root_element = self;
2307
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002308 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002309 if (it->parent_stack == NULL) {
2310 Py_DECREF(it);
2311 PyErr_NoMemory();
2312 return NULL;
2313 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002314 it->parent_stack_used = 0;
2315 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002316
Victor Stinner1b184552019-10-08 00:09:31 +02002317 PyObject_GC_Track(it);
2318
Eli Bendersky64d11e62012-06-15 07:42:50 +03002319 return (PyObject *)it;
2320}
2321
2322
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323/* ==================================================================== */
2324/* the tree builder type */
2325
2326typedef struct {
2327 PyObject_HEAD
2328
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330
Antoine Pitrouee329312012-10-04 19:53:29 +02002331 PyObject *this; /* current node */
2332 PyObject *last; /* most recently created node */
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002333 PyObject *last_for_tail; /* most recently created node that takes a tail */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334
Eli Bendersky58d548d2012-05-29 15:45:16 +03002335 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336
Eli Bendersky58d548d2012-05-29 15:45:16 +03002337 PyObject *stack; /* element stack */
2338 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
Eli Bendersky48d358b2012-05-30 17:57:50 +03002340 PyObject *element_factory;
Stefan Behnel43851a22019-05-01 21:20:38 +02002341 PyObject *comment_factory;
2342 PyObject *pi_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002344 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002345 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002346 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2347 PyObject *end_event_obj;
2348 PyObject *start_ns_event_obj;
2349 PyObject *end_ns_event_obj;
Stefan Behnel43851a22019-05-01 21:20:38 +02002350 PyObject *comment_event_obj;
2351 PyObject *pi_event_obj;
2352
2353 char insert_comments;
2354 char insert_pis;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355} TreeBuilderObject;
2356
Andy Lesterdffe4c02020-03-04 07:15:20 -06002357#define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358
2359/* -------------------------------------------------------------------- */
2360/* constructor and destructor */
2361
Eli Bendersky58d548d2012-05-29 15:45:16 +03002362static PyObject *
2363treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002364{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002365 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2366 if (t != NULL) {
2367 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368
Eli Bendersky58d548d2012-05-29 15:45:16 +03002369 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002370 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002371 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002372 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373
Eli Bendersky58d548d2012-05-29 15:45:16 +03002374 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002375 t->element_factory = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002376 t->comment_factory = NULL;
2377 t->pi_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002378 t->stack = PyList_New(20);
2379 if (!t->stack) {
2380 Py_DECREF(t->this);
2381 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002382 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 return NULL;
2384 }
2385 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002387 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 t->start_event_obj = t->end_event_obj = NULL;
2389 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
Stefan Behnel43851a22019-05-01 21:20:38 +02002390 t->comment_event_obj = t->pi_event_obj = NULL;
2391 t->insert_comments = t->insert_pis = 0;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002392 }
2393 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394}
2395
Serhiy Storchakacb985562015-05-04 15:32:48 +03002396/*[clinic input]
2397_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002398
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002399 element_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002400 *
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002401 comment_factory: object = None
2402 pi_factory: object = None
Stefan Behnel43851a22019-05-01 21:20:38 +02002403 insert_comments: bool = False
2404 insert_pis: bool = False
Serhiy Storchakacb985562015-05-04 15:32:48 +03002405
2406[clinic start generated code]*/
2407
2408static int
2409_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
Stefan Behnel43851a22019-05-01 21:20:38 +02002410 PyObject *element_factory,
2411 PyObject *comment_factory,
2412 PyObject *pi_factory,
2413 int insert_comments, int insert_pis)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002414/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
Serhiy Storchakacb985562015-05-04 15:32:48 +03002415{
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002416 if (element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002417 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002418 Py_XSETREF(self->element_factory, element_factory);
Stefan Behnel43851a22019-05-01 21:20:38 +02002419 } else {
2420 Py_CLEAR(self->element_factory);
2421 }
2422
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002423 if (comment_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002424 elementtreestate *st = ET_STATE_GLOBAL;
2425 comment_factory = st->comment_factory;
2426 }
2427 if (comment_factory) {
2428 Py_INCREF(comment_factory);
2429 Py_XSETREF(self->comment_factory, comment_factory);
2430 self->insert_comments = insert_comments;
2431 } else {
2432 Py_CLEAR(self->comment_factory);
2433 self->insert_comments = 0;
2434 }
2435
Serhiy Storchaka279f4462019-09-14 12:24:05 +03002436 if (pi_factory == Py_None) {
Stefan Behnel43851a22019-05-01 21:20:38 +02002437 elementtreestate *st = ET_STATE_GLOBAL;
2438 pi_factory = st->pi_factory;
2439 }
2440 if (pi_factory) {
2441 Py_INCREF(pi_factory);
2442 Py_XSETREF(self->pi_factory, pi_factory);
2443 self->insert_pis = insert_pis;
2444 } else {
2445 Py_CLEAR(self->pi_factory);
2446 self->insert_pis = 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002447 }
2448
Eli Bendersky58d548d2012-05-29 15:45:16 +03002449 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450}
2451
Eli Bendersky48d358b2012-05-30 17:57:50 +03002452static int
2453treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2454{
Stefan Behnel43851a22019-05-01 21:20:38 +02002455 Py_VISIT(self->pi_event_obj);
2456 Py_VISIT(self->comment_event_obj);
Serhiy Storchakad2a75c62018-12-18 22:29:14 +02002457 Py_VISIT(self->end_ns_event_obj);
2458 Py_VISIT(self->start_ns_event_obj);
2459 Py_VISIT(self->end_event_obj);
2460 Py_VISIT(self->start_event_obj);
2461 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002462 Py_VISIT(self->root);
2463 Py_VISIT(self->this);
2464 Py_VISIT(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002465 Py_VISIT(self->last_for_tail);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002466 Py_VISIT(self->data);
2467 Py_VISIT(self->stack);
Stefan Behnel43851a22019-05-01 21:20:38 +02002468 Py_VISIT(self->pi_factory);
2469 Py_VISIT(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002470 Py_VISIT(self->element_factory);
2471 return 0;
2472}
2473
2474static int
2475treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476{
Stefan Behnel43851a22019-05-01 21:20:38 +02002477 Py_CLEAR(self->pi_event_obj);
2478 Py_CLEAR(self->comment_event_obj);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002479 Py_CLEAR(self->end_ns_event_obj);
2480 Py_CLEAR(self->start_ns_event_obj);
2481 Py_CLEAR(self->end_event_obj);
2482 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002483 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002484 Py_CLEAR(self->stack);
2485 Py_CLEAR(self->data);
2486 Py_CLEAR(self->last);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002487 Py_CLEAR(self->last_for_tail);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002488 Py_CLEAR(self->this);
Stefan Behnel43851a22019-05-01 21:20:38 +02002489 Py_CLEAR(self->pi_factory);
2490 Py_CLEAR(self->comment_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002491 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002492 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002493 return 0;
2494}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495
Eli Bendersky48d358b2012-05-30 17:57:50 +03002496static void
2497treebuilder_dealloc(TreeBuilderObject *self)
2498{
2499 PyObject_GC_UnTrack(self);
2500 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002501 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502}
2503
2504/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002505/* helpers for handling of arbitrary element-like objects */
2506
Stefan Behnel43851a22019-05-01 21:20:38 +02002507/*[clinic input]
2508_elementtree._set_factories
2509
2510 comment_factory: object
2511 pi_factory: object
2512 /
2513
2514Change the factories used to create comments and processing instructions.
2515
2516For internal use only.
2517[clinic start generated code]*/
2518
2519static PyObject *
2520_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2521 PyObject *pi_factory)
2522/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2523{
2524 elementtreestate *st = ET_STATE_GLOBAL;
2525 PyObject *old;
2526
2527 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2528 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2529 Py_TYPE(comment_factory)->tp_name);
2530 return NULL;
2531 }
2532 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2533 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2534 Py_TYPE(pi_factory)->tp_name);
2535 return NULL;
2536 }
2537
2538 old = PyTuple_Pack(2,
2539 st->comment_factory ? st->comment_factory : Py_None,
2540 st->pi_factory ? st->pi_factory : Py_None);
2541
2542 if (comment_factory == Py_None) {
2543 Py_CLEAR(st->comment_factory);
2544 } else {
2545 Py_INCREF(comment_factory);
2546 Py_XSETREF(st->comment_factory, comment_factory);
2547 }
2548 if (pi_factory == Py_None) {
2549 Py_CLEAR(st->pi_factory);
2550 } else {
2551 Py_INCREF(pi_factory);
2552 Py_XSETREF(st->pi_factory, pi_factory);
2553 }
2554
2555 return old;
2556}
2557
Antoine Pitrouee329312012-10-04 19:53:29 +02002558static int
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002559treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2560 PyObject **dest, _Py_Identifier *name)
Antoine Pitrouee329312012-10-04 19:53:29 +02002561{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002562 /* Fast paths for the "almost always" cases. */
Antoine Pitrouee329312012-10-04 19:53:29 +02002563 if (Element_CheckExact(element)) {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002564 PyObject *dest_obj = JOIN_OBJ(*dest);
2565 if (dest_obj == Py_None) {
2566 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2567 *data = NULL;
2568 Py_DECREF(dest_obj);
2569 return 0;
2570 }
2571 else if (JOIN_GET(*dest)) {
2572 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2573 return -1;
2574 }
2575 Py_CLEAR(*data);
2576 return 0;
2577 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002578 }
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002579
2580 /* Fallback for the non-Element / non-trivial cases. */
2581 {
Antoine Pitrouee329312012-10-04 19:53:29 +02002582 int r;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002583 PyObject* joined;
2584 PyObject* previous = _PyObject_GetAttrId(element, name);
2585 if (!previous)
Antoine Pitrouee329312012-10-04 19:53:29 +02002586 return -1;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002587 joined = list_join(*data);
2588 if (!joined) {
2589 Py_DECREF(previous);
2590 return -1;
2591 }
2592 if (previous != Py_None) {
2593 PyObject *tmp = PyNumber_Add(previous, joined);
2594 Py_DECREF(joined);
2595 Py_DECREF(previous);
2596 if (!tmp)
2597 return -1;
2598 joined = tmp;
2599 } else {
2600 Py_DECREF(previous);
2601 }
2602
Antoine Pitrouee329312012-10-04 19:53:29 +02002603 r = _PyObject_SetAttrId(element, name, joined);
2604 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002605 if (r < 0)
2606 return -1;
2607 Py_CLEAR(*data);
2608 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002609 }
2610}
2611
Serhiy Storchaka576def02017-03-30 09:47:31 +03002612LOCAL(int)
2613treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002614{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002615 if (!self->data) {
2616 return 0;
2617 }
2618
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002619 if (!self->last_for_tail) {
2620 PyObject *element = self->last;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002621 _Py_IDENTIFIER(text);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002622 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002623 element, &self->data,
2624 &((ElementObject *) element)->text, &PyId_text);
2625 }
2626 else {
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002627 PyObject *element = self->last_for_tail;
Serhiy Storchaka576def02017-03-30 09:47:31 +03002628 _Py_IDENTIFIER(tail);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002629 return treebuilder_extend_element_text_or_tail(
Serhiy Storchaka576def02017-03-30 09:47:31 +03002630 element, &self->data,
2631 &((ElementObject *) element)->tail, &PyId_tail);
2632 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002633}
2634
2635static int
2636treebuilder_add_subelement(PyObject *element, PyObject *child)
2637{
2638 _Py_IDENTIFIER(append);
2639 if (Element_CheckExact(element)) {
2640 ElementObject *elem = (ElementObject *) element;
2641 return element_add_subelement(elem, child);
2642 }
2643 else {
2644 PyObject *res;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002645 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
Antoine Pitrouee329312012-10-04 19:53:29 +02002646 if (res == NULL)
2647 return -1;
2648 Py_DECREF(res);
2649 return 0;
2650 }
2651}
2652
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002653LOCAL(int)
2654treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2655 PyObject *node)
2656{
2657 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002658 PyObject *res;
2659 PyObject *event = PyTuple_Pack(2, action, node);
2660 if (event == NULL)
2661 return -1;
Petr Viktorinffd97532020-02-11 17:46:57 +01002662 res = PyObject_CallOneArg(self->events_append, event);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002663 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002664 if (res == NULL)
2665 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002666 Py_DECREF(res);
2667 }
2668 return 0;
2669}
2670
Antoine Pitrouee329312012-10-04 19:53:29 +02002671/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002672/* handlers */
2673
2674LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2676 PyObject* attrib)
2677{
2678 PyObject* node;
2679 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002680 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681
Serhiy Storchaka576def02017-03-30 09:47:31 +03002682 if (treebuilder_flush_data(self) < 0) {
2683 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684 }
2685
Stefan Behnel43851a22019-05-01 21:20:38 +02002686 if (!self->element_factory) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002687 node = create_new_element(tag, attrib);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02002688 } else if (attrib == NULL) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002689 attrib = PyDict_New();
2690 if (!attrib)
2691 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002692 node = PyObject_CallFunctionObjArgs(self->element_factory,
2693 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002694 Py_DECREF(attrib);
2695 }
2696 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002697 node = PyObject_CallFunctionObjArgs(self->element_factory,
2698 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002699 }
2700 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002702 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703
Antoine Pitrouee329312012-10-04 19:53:29 +02002704 this = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002705 Py_CLEAR(self->last_for_tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
2707 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002708 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002709 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710 } else {
2711 if (self->root) {
2712 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002713 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714 "multiple elements on top level"
2715 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002716 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 }
2718 Py_INCREF(node);
2719 self->root = node;
2720 }
2721
2722 if (self->index < PyList_GET_SIZE(self->stack)) {
2723 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002724 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725 Py_INCREF(this);
2726 } else {
2727 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002728 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729 }
2730 self->index++;
2731
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002733 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002735 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002737 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2738 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
2740 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002741
2742 error:
2743 Py_DECREF(node);
2744 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745}
2746
2747LOCAL(PyObject*)
2748treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2749{
2750 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002751 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002752 /* ignore calls to data before the first call to start */
2753 Py_RETURN_NONE;
2754 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 /* store the first item as is */
2756 Py_INCREF(data); self->data = data;
2757 } else {
2758 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002759 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2760 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002761 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 /* expat often generates single character data sections; handle
2763 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002764 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2765 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002767 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 } else if (PyList_CheckExact(self->data)) {
2769 if (PyList_Append(self->data, data) < 0)
2770 return NULL;
2771 } else {
2772 PyObject* list = PyList_New(2);
2773 if (!list)
2774 return NULL;
2775 PyList_SET_ITEM(list, 0, self->data);
2776 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2777 self->data = list;
2778 }
2779 }
2780
2781 Py_RETURN_NONE;
2782}
2783
2784LOCAL(PyObject*)
2785treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2786{
2787 PyObject* item;
2788
Serhiy Storchaka576def02017-03-30 09:47:31 +03002789 if (treebuilder_flush_data(self) < 0) {
2790 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 }
2792
2793 if (self->index == 0) {
2794 PyErr_SetString(
2795 PyExc_IndexError,
2796 "pop from empty stack"
2797 );
2798 return NULL;
2799 }
2800
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002801 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002802 self->last = self->this;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002803 Py_INCREF(self->last);
2804 Py_XSETREF(self->last_for_tail, self->last);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002805 self->index--;
2806 self->this = PyList_GET_ITEM(self->stack, self->index);
2807 Py_INCREF(self->this);
2808 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002810 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2811 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812
2813 Py_INCREF(self->last);
2814 return (PyObject*) self->last;
2815}
2816
Stefan Behnel43851a22019-05-01 21:20:38 +02002817LOCAL(PyObject*)
2818treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2819{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002820 PyObject* comment;
Stefan Behnel43851a22019-05-01 21:20:38 +02002821 PyObject* this;
2822
2823 if (treebuilder_flush_data(self) < 0) {
2824 return NULL;
2825 }
2826
2827 if (self->comment_factory) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002828 comment = PyObject_CallOneArg(self->comment_factory, text);
Stefan Behnel43851a22019-05-01 21:20:38 +02002829 if (!comment)
2830 return NULL;
2831
2832 this = self->this;
2833 if (self->insert_comments && this != Py_None) {
2834 if (treebuilder_add_subelement(this, comment) < 0)
2835 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002836 Py_INCREF(comment);
2837 Py_XSETREF(self->last_for_tail, comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02002838 }
2839 } else {
2840 Py_INCREF(text);
2841 comment = text;
2842 }
2843
2844 if (self->events_append && self->comment_event_obj) {
2845 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2846 goto error;
2847 }
2848
2849 return comment;
2850
2851 error:
2852 Py_DECREF(comment);
2853 return NULL;
2854}
2855
2856LOCAL(PyObject*)
2857treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2858{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002859 PyObject* pi;
Stefan Behnel43851a22019-05-01 21:20:38 +02002860 PyObject* this;
2861 PyObject* stack[2] = {target, text};
2862
2863 if (treebuilder_flush_data(self) < 0) {
2864 return NULL;
2865 }
2866
2867 if (self->pi_factory) {
2868 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2869 if (!pi) {
2870 return NULL;
2871 }
2872
2873 this = self->this;
2874 if (self->insert_pis && this != Py_None) {
2875 if (treebuilder_add_subelement(this, pi) < 0)
2876 goto error;
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02002877 Py_INCREF(pi);
2878 Py_XSETREF(self->last_for_tail, pi);
Stefan Behnel43851a22019-05-01 21:20:38 +02002879 }
2880 } else {
2881 pi = PyTuple_Pack(2, target, text);
2882 if (!pi) {
2883 return NULL;
2884 }
2885 }
2886
2887 if (self->events_append && self->pi_event_obj) {
2888 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2889 goto error;
2890 }
2891
2892 return pi;
2893
2894 error:
2895 Py_DECREF(pi);
2896 return NULL;
2897}
2898
Stefan Behneldde3eeb2019-05-01 21:49:58 +02002899LOCAL(PyObject*)
2900treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2901{
2902 PyObject* parcel;
2903
2904 if (self->events_append && self->start_ns_event_obj) {
2905 parcel = PyTuple_Pack(2, prefix, uri);
2906 if (!parcel) {
2907 return NULL;
2908 }
2909
2910 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2911 Py_DECREF(parcel);
2912 return NULL;
2913 }
2914 Py_DECREF(parcel);
2915 }
2916
2917 Py_RETURN_NONE;
2918}
2919
2920LOCAL(PyObject*)
2921treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2922{
2923 if (self->events_append && self->end_ns_event_obj) {
2924 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2925 return NULL;
2926 }
2927 }
2928
2929 Py_RETURN_NONE;
2930}
2931
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932/* -------------------------------------------------------------------- */
2933/* methods (in alphabetical order) */
2934
Serhiy Storchakacb985562015-05-04 15:32:48 +03002935/*[clinic input]
2936_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937
Serhiy Storchakacb985562015-05-04 15:32:48 +03002938 data: object
2939 /
2940
2941[clinic start generated code]*/
2942
2943static PyObject *
2944_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2945/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2946{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 return treebuilder_handle_data(self, data);
2948}
2949
Serhiy Storchakacb985562015-05-04 15:32:48 +03002950/*[clinic input]
2951_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952
Serhiy Storchakacb985562015-05-04 15:32:48 +03002953 tag: object
2954 /
2955
2956[clinic start generated code]*/
2957
2958static PyObject *
2959_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2960/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2961{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 return treebuilder_handle_end(self, tag);
2963}
2964
Stefan Behnel43851a22019-05-01 21:20:38 +02002965/*[clinic input]
2966_elementtree.TreeBuilder.comment
2967
2968 text: object
2969 /
2970
2971[clinic start generated code]*/
2972
2973static PyObject *
2974_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2975/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2976{
2977 return treebuilder_handle_comment(self, text);
2978}
2979
2980/*[clinic input]
2981_elementtree.TreeBuilder.pi
2982
2983 target: object
2984 text: object = None
2985 /
2986
2987[clinic start generated code]*/
2988
2989static PyObject *
2990_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2991 PyObject *text)
2992/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2993{
2994 return treebuilder_handle_pi(self, target, text);
2995}
2996
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002997LOCAL(PyObject*)
2998treebuilder_done(TreeBuilderObject* self)
2999{
3000 PyObject* res;
3001
3002 /* FIXME: check stack size? */
3003
3004 if (self->root)
3005 res = self->root;
3006 else
3007 res = Py_None;
3008
3009 Py_INCREF(res);
3010 return res;
3011}
3012
Serhiy Storchakacb985562015-05-04 15:32:48 +03003013/*[clinic input]
3014_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003015
Serhiy Storchakacb985562015-05-04 15:32:48 +03003016[clinic start generated code]*/
3017
3018static PyObject *
3019_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3020/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3021{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 return treebuilder_done(self);
3023}
3024
Serhiy Storchakacb985562015-05-04 15:32:48 +03003025/*[clinic input]
3026_elementtree.TreeBuilder.start
3027
3028 tag: object
Shantanu4edc95c2020-03-01 22:33:24 -08003029 attrs: object(subclass_of='&PyDict_Type')
Serhiy Storchakacb985562015-05-04 15:32:48 +03003030 /
3031
3032[clinic start generated code]*/
3033
3034static PyObject *
3035_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3036 PyObject *attrs)
Shantanu4edc95c2020-03-01 22:33:24 -08003037/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003038{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003039 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003040}
3041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042/* ==================================================================== */
3043/* the expat interface */
3044
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003046#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07003047
3048/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3049 * cached globally without being in per-module state.
3050 */
Eli Bendersky20d41742012-06-01 09:48:37 +03003051static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003053
Eli Bendersky52467b12012-06-01 07:13:08 +03003054static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3055 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3056
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003057typedef struct {
3058 PyObject_HEAD
3059
3060 XML_Parser parser;
3061
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003062 PyObject *target;
3063 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003065 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003067 PyObject *handle_start_ns;
3068 PyObject *handle_end_ns;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003069 PyObject *handle_start;
3070 PyObject *handle_data;
3071 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003073 PyObject *handle_comment;
3074 PyObject *handle_pi;
3075 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003077 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079} XMLParserObject;
3080
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003081/* helpers */
3082
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083LOCAL(PyObject*)
3084makeuniversal(XMLParserObject* self, const char* string)
3085{
3086 /* convert a UTF-8 tag/attribute name from the expat parser
3087 to a universal name string */
3088
Antoine Pitrouc1948842012-10-01 23:40:37 +02003089 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090 PyObject* key;
3091 PyObject* value;
3092
3093 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00003094 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003095 if (!key)
3096 return NULL;
3097
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003098 value = PyDict_GetItemWithError(self->names, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099
3100 if (value) {
3101 Py_INCREF(value);
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003102 }
3103 else if (!PyErr_Occurred()) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 /* new name. convert to universal name, and decode as
3105 necessary */
3106
3107 PyObject* tag;
3108 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02003109 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003110
3111 /* look for namespace separator */
3112 for (i = 0; i < size; i++)
3113 if (string[i] == '}')
3114 break;
3115 if (i != size) {
3116 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003117 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02003118 if (tag == NULL) {
3119 Py_DECREF(key);
3120 return NULL;
3121 }
Christian Heimes72b710a2008-05-26 13:28:38 +00003122 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003123 p[0] = '{';
3124 memcpy(p+1, string, size);
3125 size++;
3126 } else {
3127 /* plain name; use key as tag */
3128 Py_INCREF(key);
3129 tag = key;
3130 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003131
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00003133 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00003134 value = PyUnicode_DecodeUTF8(p, size, "strict");
3135 Py_DECREF(tag);
3136 if (!value) {
3137 Py_DECREF(key);
3138 return NULL;
3139 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003140
3141 /* add to names dictionary */
3142 if (PyDict_SetItem(self->names, key, value) < 0) {
3143 Py_DECREF(key);
3144 Py_DECREF(value);
3145 return NULL;
3146 }
3147 }
3148
3149 Py_DECREF(key);
3150 return value;
3151}
3152
Eli Bendersky5b77d812012-03-16 08:20:05 +02003153/* Set the ParseError exception with the given parameters.
3154 * If message is not NULL, it's used as the error string. Otherwise, the
3155 * message string is the default for the given error_code.
3156*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003157static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003158expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3159 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003160{
Eli Bendersky5b77d812012-03-16 08:20:05 +02003161 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003162 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003163
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003164 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02003165 message ? message : EXPAT(ErrorString)(error_code),
3166 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003167 if (errmsg == NULL)
3168 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003169
Petr Viktorinffd97532020-02-11 17:46:57 +01003170 error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01003171 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003172 if (!error)
3173 return;
3174
Eli Bendersky5b77d812012-03-16 08:20:05 +02003175 /* Add code and position attributes */
3176 code = PyLong_FromLong((long)error_code);
3177 if (!code) {
3178 Py_DECREF(error);
3179 return;
3180 }
3181 if (PyObject_SetAttrString(error, "code", code) == -1) {
3182 Py_DECREF(error);
3183 Py_DECREF(code);
3184 return;
3185 }
3186 Py_DECREF(code);
3187
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003188 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003189 if (!position) {
3190 Py_DECREF(error);
3191 return;
3192 }
3193 if (PyObject_SetAttrString(error, "position", position) == -1) {
3194 Py_DECREF(error);
3195 Py_DECREF(position);
3196 return;
3197 }
3198 Py_DECREF(position);
3199
Eli Bendersky532d03e2013-08-10 08:00:39 -07003200 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003201 Py_DECREF(error);
3202}
3203
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204/* -------------------------------------------------------------------- */
3205/* handlers */
3206
3207static void
3208expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3209 int data_len)
3210{
3211 PyObject* key;
3212 PyObject* value;
3213 PyObject* res;
3214
3215 if (data_len < 2 || data_in[0] != '&')
3216 return;
3217
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003218 if (PyErr_Occurred())
3219 return;
3220
Neal Norwitz0269b912007-08-08 06:56:02 +00003221 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222 if (!key)
3223 return;
3224
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02003225 value = PyDict_GetItemWithError(self->entity, key);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226
3227 if (value) {
3228 if (TreeBuilder_CheckExact(self->target))
3229 res = treebuilder_handle_data(
3230 (TreeBuilderObject*) self->target, value
3231 );
3232 else if (self->handle_data)
Petr Viktorinffd97532020-02-11 17:46:57 +01003233 res = PyObject_CallOneArg(self->handle_data, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 else
3235 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003237 } else if (!PyErr_Occurred()) {
3238 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00003239 char message[128] = "undefined entity ";
3240 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003241 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003242 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003244 EXPAT(GetErrorColumnNumber)(self->parser),
3245 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246 );
3247 }
3248
3249 Py_DECREF(key);
3250}
3251
3252static void
3253expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3254 const XML_Char **attrib_in)
3255{
3256 PyObject* res;
3257 PyObject* tag;
3258 PyObject* attrib;
3259 int ok;
3260
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003261 if (PyErr_Occurred())
3262 return;
3263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 /* tag name */
3265 tag = makeuniversal(self, tag_in);
3266 if (!tag)
3267 return; /* parser will look for errors */
3268
3269 /* attributes */
3270 if (attrib_in[0]) {
3271 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003272 if (!attrib) {
3273 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003275 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 while (attrib_in[0] && attrib_in[1]) {
3277 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003278 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 if (!key || !value) {
3280 Py_XDECREF(value);
3281 Py_XDECREF(key);
3282 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003283 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 return;
3285 }
3286 ok = PyDict_SetItem(attrib, key, value);
3287 Py_DECREF(value);
3288 Py_DECREF(key);
3289 if (ok < 0) {
3290 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003291 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 return;
3293 }
3294 attrib_in += 2;
3295 }
3296 } else {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003297 attrib = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003298 }
3299
3300 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 /* shortcut */
3302 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3303 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003304 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003305 else if (self->handle_start) {
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003306 if (attrib == NULL) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003307 attrib = PyDict_New();
3308 if (!attrib) {
3309 Py_DECREF(tag);
3310 return;
3311 }
3312 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003313 res = PyObject_CallFunctionObjArgs(self->handle_start,
3314 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003315 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316 res = NULL;
3317
3318 Py_DECREF(tag);
Serhiy Storchakadccd41e2020-03-09 15:12:41 +02003319 Py_XDECREF(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320
3321 Py_XDECREF(res);
3322}
3323
3324static void
3325expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3326 int data_len)
3327{
3328 PyObject* data;
3329 PyObject* res;
3330
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003331 if (PyErr_Occurred())
3332 return;
3333
Neal Norwitz0269b912007-08-08 06:56:02 +00003334 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003335 if (!data)
3336 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
3338 if (TreeBuilder_CheckExact(self->target))
3339 /* shortcut */
3340 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3341 else if (self->handle_data)
Petr Viktorinffd97532020-02-11 17:46:57 +01003342 res = PyObject_CallOneArg(self->handle_data, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 else
3344 res = NULL;
3345
3346 Py_DECREF(data);
3347
3348 Py_XDECREF(res);
3349}
3350
3351static void
3352expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3353{
3354 PyObject* tag;
3355 PyObject* res = NULL;
3356
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003357 if (PyErr_Occurred())
3358 return;
3359
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360 if (TreeBuilder_CheckExact(self->target))
3361 /* shortcut */
3362 /* the standard tree builder doesn't look at the end tag */
3363 res = treebuilder_handle_end(
3364 (TreeBuilderObject*) self->target, Py_None
3365 );
3366 else if (self->handle_end) {
3367 tag = makeuniversal(self, tag_in);
3368 if (tag) {
Petr Viktorinffd97532020-02-11 17:46:57 +01003369 res = PyObject_CallOneArg(self->handle_end, tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 Py_DECREF(tag);
3371 }
3372 }
3373
3374 Py_XDECREF(res);
3375}
3376
3377static void
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003378expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3379 const XML_Char *uri_in)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003381 PyObject* res = NULL;
3382 PyObject* uri;
3383 PyObject* prefix;
3384 PyObject* stack[2];
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003385
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003386 if (PyErr_Occurred())
3387 return;
3388
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003389 if (!uri_in)
3390 uri_in = "";
3391 if (!prefix_in)
3392 prefix_in = "";
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003393
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003394 if (TreeBuilder_CheckExact(self->target)) {
3395 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3396 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003397
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003398 if (target->events_append && target->start_ns_event_obj) {
3399 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3400 if (!prefix)
3401 return;
3402 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3403 if (!uri) {
3404 Py_DECREF(prefix);
3405 return;
3406 }
3407
3408 res = treebuilder_handle_start_ns(target, prefix, uri);
3409 Py_DECREF(uri);
3410 Py_DECREF(prefix);
3411 }
3412 } else if (self->handle_start_ns) {
3413 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3414 if (!prefix)
3415 return;
3416 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3417 if (!uri) {
3418 Py_DECREF(prefix);
3419 return;
3420 }
3421
3422 stack[0] = prefix;
3423 stack[1] = uri;
3424 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3425 Py_DECREF(uri);
3426 Py_DECREF(prefix);
3427 }
3428
3429 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003430}
3431
3432static void
3433expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3434{
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003435 PyObject *res = NULL;
3436 PyObject* prefix;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003437
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003438 if (PyErr_Occurred())
3439 return;
3440
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003441 if (!prefix_in)
3442 prefix_in = "";
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003443
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003444 if (TreeBuilder_CheckExact(self->target)) {
3445 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3446 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3447
3448 if (target->events_append && target->end_ns_event_obj) {
3449 res = treebuilder_handle_end_ns(target, Py_None);
3450 }
3451 } else if (self->handle_end_ns) {
3452 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3453 if (!prefix)
3454 return;
3455
Petr Viktorinffd97532020-02-11 17:46:57 +01003456 res = PyObject_CallOneArg(self->handle_end_ns, prefix);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003457 Py_DECREF(prefix);
3458 }
3459
3460 Py_XDECREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461}
3462
3463static void
3464expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3465{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003466 PyObject* comment;
3467 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003469 if (PyErr_Occurred())
3470 return;
3471
Stefan Behnel43851a22019-05-01 21:20:38 +02003472 if (TreeBuilder_CheckExact(self->target)) {
3473 /* shortcut */
3474 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3475
Neal Norwitz0269b912007-08-08 06:56:02 +00003476 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Stefan Behnel43851a22019-05-01 21:20:38 +02003477 if (!comment)
3478 return; /* parser will look for errors */
3479
3480 res = treebuilder_handle_comment(target, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003481 Py_XDECREF(res);
3482 Py_DECREF(comment);
Stefan Behnel43851a22019-05-01 21:20:38 +02003483 } else if (self->handle_comment) {
3484 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3485 if (!comment)
3486 return;
3487
Petr Viktorinffd97532020-02-11 17:46:57 +01003488 res = PyObject_CallOneArg(self->handle_comment, comment);
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003489 Py_XDECREF(res);
3490 Py_DECREF(comment);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491 }
3492}
3493
Eli Bendersky45839902013-01-13 05:14:47 -08003494static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003495expat_start_doctype_handler(XMLParserObject *self,
3496 const XML_Char *doctype_name,
3497 const XML_Char *sysid,
3498 const XML_Char *pubid,
3499 int has_internal_subset)
3500{
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003501 _Py_IDENTIFIER(doctype);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003502 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003503 PyObject *res;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003504
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003505 if (PyErr_Occurred())
3506 return;
3507
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003508 doctype_name_obj = makeuniversal(self, doctype_name);
3509 if (!doctype_name_obj)
3510 return;
3511
3512 if (sysid) {
3513 sysid_obj = makeuniversal(self, sysid);
3514 if (!sysid_obj) {
3515 Py_DECREF(doctype_name_obj);
3516 return;
3517 }
3518 } else {
3519 Py_INCREF(Py_None);
3520 sysid_obj = Py_None;
3521 }
3522
3523 if (pubid) {
3524 pubid_obj = makeuniversal(self, pubid);
3525 if (!pubid_obj) {
3526 Py_DECREF(doctype_name_obj);
3527 Py_DECREF(sysid_obj);
3528 return;
3529 }
3530 } else {
3531 Py_INCREF(Py_None);
3532 pubid_obj = Py_None;
3533 }
3534
3535 /* If the target has a handler for doctype, call it. */
3536 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003537 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3538 doctype_name_obj, pubid_obj,
3539 sysid_obj, NULL);
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003540 Py_XDECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003541 }
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003542 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3543 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3544 "The doctype() method of XMLParser is ignored. "
3545 "Define doctype() method on the TreeBuilder target.",
3546 1);
Serhiy Storchakaee98e7b2018-07-25 14:52:45 +03003547 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003548 }
3549
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003550 Py_DECREF(doctype_name_obj);
3551 Py_DECREF(pubid_obj);
3552 Py_DECREF(sysid_obj);
3553}
3554
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555static void
3556expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3557 const XML_Char* data_in)
3558{
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003559 PyObject* pi_target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003560 PyObject* data;
3561 PyObject* res;
Stefan Behnel43851a22019-05-01 21:20:38 +02003562 PyObject* stack[2];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003563
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003564 if (PyErr_Occurred())
3565 return;
3566
Stefan Behnel43851a22019-05-01 21:20:38 +02003567 if (TreeBuilder_CheckExact(self->target)) {
3568 /* shortcut */
3569 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3570
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003571 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
Stefan Behnel43851a22019-05-01 21:20:38 +02003572 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3573 if (!pi_target)
3574 goto error;
3575 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3576 if (!data)
3577 goto error;
3578 res = treebuilder_handle_pi(target, pi_target, data);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579 Py_XDECREF(res);
3580 Py_DECREF(data);
Stefan Behnel43851a22019-05-01 21:20:38 +02003581 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003583 } else if (self->handle_pi) {
3584 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3585 if (!pi_target)
3586 goto error;
3587 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3588 if (!data)
3589 goto error;
3590
3591 stack[0] = pi_target;
3592 stack[1] = data;
3593 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3594 Py_XDECREF(res);
3595 Py_DECREF(data);
3596 Py_DECREF(pi_target);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003597 }
Stefan Behnel43851a22019-05-01 21:20:38 +02003598
3599 return;
3600
3601 error:
3602 Py_XDECREF(pi_target);
3603 return;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604}
3605
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607
Eli Bendersky52467b12012-06-01 07:13:08 +03003608static PyObject *
3609xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610{
Eli Bendersky52467b12012-06-01 07:13:08 +03003611 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3612 if (self) {
3613 self->parser = NULL;
3614 self->target = self->entity = self->names = NULL;
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003615 self->handle_start_ns = self->handle_end_ns = NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003616 self->handle_start = self->handle_data = self->handle_end = NULL;
3617 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003618 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003620 return (PyObject *)self;
3621}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622
scoderc8d8e152017-09-14 22:00:03 +02003623static int
3624ignore_attribute_error(PyObject *value)
3625{
3626 if (value == NULL) {
3627 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3628 return -1;
3629 }
3630 PyErr_Clear();
3631 }
3632 return 0;
3633}
3634
Serhiy Storchakacb985562015-05-04 15:32:48 +03003635/*[clinic input]
3636_elementtree.XMLParser.__init__
3637
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003638 *
Serhiy Storchakacb985562015-05-04 15:32:48 +03003639 target: object = NULL
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003640 encoding: str(accept={str, NoneType}) = None
Serhiy Storchakacb985562015-05-04 15:32:48 +03003641
3642[clinic start generated code]*/
3643
Eli Bendersky52467b12012-06-01 07:13:08 +03003644static int
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003645_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3646 const char *encoding)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03003647/*[clinic end generated code: output=3ae45ec6cdf344e4 input=53e35a829ae043e8]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003648{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003649 self->entity = PyDict_New();
3650 if (!self->entity)
3651 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003652
Serhiy Storchakacb985562015-05-04 15:32:48 +03003653 self->names = PyDict_New();
3654 if (!self->names) {
3655 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003656 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003658
Serhiy Storchakacb985562015-05-04 15:32:48 +03003659 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3660 if (!self->parser) {
3661 Py_CLEAR(self->entity);
3662 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003663 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003664 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665 }
Christian Heimescb5778f2018-09-18 14:38:58 +02003666 /* expat < 2.1.0 has no XML_SetHashSalt() */
3667 if (EXPAT(SetHashSalt) != NULL) {
3668 EXPAT(SetHashSalt)(self->parser,
3669 (unsigned long)_Py_HashSecret.expat.hashsalt);
3670 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671
Eli Bendersky52467b12012-06-01 07:13:08 +03003672 if (target) {
3673 Py_INCREF(target);
3674 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003675 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003676 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003677 Py_CLEAR(self->entity);
3678 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003679 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003680 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003681 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003682 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003684 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3685 if (ignore_attribute_error(self->handle_start_ns)) {
3686 return -1;
3687 }
3688 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3689 if (ignore_attribute_error(self->handle_end_ns)) {
3690 return -1;
3691 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003692 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003693 if (ignore_attribute_error(self->handle_start)) {
3694 return -1;
3695 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003696 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003697 if (ignore_attribute_error(self->handle_data)) {
3698 return -1;
3699 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003700 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003701 if (ignore_attribute_error(self->handle_end)) {
3702 return -1;
3703 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003704 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003705 if (ignore_attribute_error(self->handle_comment)) {
3706 return -1;
3707 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003709 if (ignore_attribute_error(self->handle_pi)) {
3710 return -1;
3711 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003712 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003713 if (ignore_attribute_error(self->handle_close)) {
3714 return -1;
3715 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003716 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003717 if (ignore_attribute_error(self->handle_doctype)) {
3718 return -1;
3719 }
Eli Bendersky45839902013-01-13 05:14:47 -08003720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003721 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003722 EXPAT(SetUserData)(self->parser, self);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003723 if (self->handle_start_ns || self->handle_end_ns)
3724 EXPAT(SetNamespaceDeclHandler)(
3725 self->parser,
3726 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3727 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3728 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003729 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003730 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003731 (XML_StartElementHandler) expat_start_handler,
3732 (XML_EndElementHandler) expat_end_handler
3733 );
3734 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003735 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003736 (XML_DefaultHandler) expat_default_handler
3737 );
3738 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003739 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003740 (XML_CharacterDataHandler) expat_data_handler
3741 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003742 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003743 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003744 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003745 (XML_CommentHandler) expat_comment_handler
3746 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003747 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003748 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003749 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003750 (XML_ProcessingInstructionHandler) expat_pi_handler
3751 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003752 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003753 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003754 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3755 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003756 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003757 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003758 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003759 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003760
Eli Bendersky52467b12012-06-01 07:13:08 +03003761 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762}
3763
Eli Bendersky52467b12012-06-01 07:13:08 +03003764static int
3765xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3766{
3767 Py_VISIT(self->handle_close);
3768 Py_VISIT(self->handle_pi);
3769 Py_VISIT(self->handle_comment);
3770 Py_VISIT(self->handle_end);
3771 Py_VISIT(self->handle_data);
3772 Py_VISIT(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003773 Py_VISIT(self->handle_start_ns);
3774 Py_VISIT(self->handle_end_ns);
3775 Py_VISIT(self->handle_doctype);
Eli Bendersky52467b12012-06-01 07:13:08 +03003776
3777 Py_VISIT(self->target);
3778 Py_VISIT(self->entity);
3779 Py_VISIT(self->names);
3780
3781 return 0;
3782}
3783
3784static int
3785xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003786{
Victor Stinnere727d412017-09-18 05:29:37 -07003787 if (self->parser != NULL) {
3788 XML_Parser parser = self->parser;
3789 self->parser = NULL;
3790 EXPAT(ParserFree)(parser);
3791 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003792
Antoine Pitrouc1948842012-10-01 23:40:37 +02003793 Py_CLEAR(self->handle_close);
3794 Py_CLEAR(self->handle_pi);
3795 Py_CLEAR(self->handle_comment);
3796 Py_CLEAR(self->handle_end);
3797 Py_CLEAR(self->handle_data);
3798 Py_CLEAR(self->handle_start);
Stefan Behneldde3eeb2019-05-01 21:49:58 +02003799 Py_CLEAR(self->handle_start_ns);
3800 Py_CLEAR(self->handle_end_ns);
Antoine Pitrouc1948842012-10-01 23:40:37 +02003801 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003802
Antoine Pitrouc1948842012-10-01 23:40:37 +02003803 Py_CLEAR(self->target);
3804 Py_CLEAR(self->entity);
3805 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003806
Eli Bendersky52467b12012-06-01 07:13:08 +03003807 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003808}
3809
Eli Bendersky52467b12012-06-01 07:13:08 +03003810static void
3811xmlparser_dealloc(XMLParserObject* self)
3812{
3813 PyObject_GC_UnTrack(self);
3814 xmlparser_gc_clear(self);
3815 Py_TYPE(self)->tp_free((PyObject *)self);
3816}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003817
Oren Milman402e1cd2020-04-12 17:36:41 +03003818Py_LOCAL_INLINE(int)
3819_check_xmlparser(XMLParserObject* self)
3820{
3821 if (self->target == NULL) {
3822 PyErr_SetString(PyExc_ValueError,
3823 "XMLParser.__init__() wasn't called");
3824 return 0;
3825 }
3826 return 1;
3827}
3828
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003829LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003830expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003831{
3832 int ok;
3833
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003834 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003835 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3836
3837 if (PyErr_Occurred())
3838 return NULL;
3839
3840 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003841 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003842 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003843 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003844 EXPAT(GetErrorColumnNumber)(self->parser),
3845 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003846 );
3847 return NULL;
3848 }
3849
3850 Py_RETURN_NONE;
3851}
3852
Serhiy Storchakacb985562015-05-04 15:32:48 +03003853/*[clinic input]
3854_elementtree.XMLParser.close
3855
3856[clinic start generated code]*/
3857
3858static PyObject *
3859_elementtree_XMLParser_close_impl(XMLParserObject *self)
3860/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003861{
3862 /* end feeding data to parser */
3863
3864 PyObject* res;
Oren Milman402e1cd2020-04-12 17:36:41 +03003865
3866 if (!_check_xmlparser(self)) {
3867 return NULL;
3868 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003869 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003870 if (!res)
3871 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003872
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003873 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003874 Py_DECREF(res);
3875 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003876 }
3877 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003878 Py_DECREF(res);
Victor Stinner2ff58a22019-06-17 14:27:23 +02003879 return PyObject_CallNoArgs(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003880 }
3881 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003882 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003883 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003884}
3885
Serhiy Storchakacb985562015-05-04 15:32:48 +03003886/*[clinic input]
3887_elementtree.XMLParser.feed
3888
3889 data: object
3890 /
3891
3892[clinic start generated code]*/
3893
3894static PyObject *
3895_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3896/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003897{
3898 /* feed data to parser */
3899
Oren Milman402e1cd2020-04-12 17:36:41 +03003900 if (!_check_xmlparser(self)) {
3901 return NULL;
3902 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003903 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003904 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003905 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3906 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003907 return NULL;
3908 if (data_len > INT_MAX) {
3909 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3910 return NULL;
3911 }
3912 /* Explicitly set UTF-8 encoding. Return code ignored. */
3913 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003914 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003915 }
3916 else {
3917 Py_buffer view;
3918 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003919 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003920 return NULL;
3921 if (view.len > INT_MAX) {
3922 PyBuffer_Release(&view);
3923 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3924 return NULL;
3925 }
3926 res = expat_parse(self, view.buf, (int)view.len, 0);
3927 PyBuffer_Release(&view);
3928 return res;
3929 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003930}
3931
Serhiy Storchakacb985562015-05-04 15:32:48 +03003932/*[clinic input]
3933_elementtree.XMLParser._parse_whole
3934
3935 file: object
3936 /
3937
3938[clinic start generated code]*/
3939
3940static PyObject *
3941_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3942/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003943{
Eli Benderskya3699232013-05-19 18:47:23 -07003944 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003945 PyObject* reader;
3946 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003947 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003948 PyObject* res;
3949
Oren Milman402e1cd2020-04-12 17:36:41 +03003950 if (!_check_xmlparser(self)) {
3951 return NULL;
3952 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003953 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003954 if (!reader)
3955 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003956
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003957 /* read from open file object */
3958 for (;;) {
3959
3960 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3961
3962 if (!buffer) {
3963 /* read failed (e.g. due to KeyboardInterrupt) */
3964 Py_DECREF(reader);
3965 return NULL;
3966 }
3967
Eli Benderskyf996e772012-03-16 05:53:30 +02003968 if (PyUnicode_CheckExact(buffer)) {
3969 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003970 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003971 Py_DECREF(buffer);
3972 break;
3973 }
3974 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003975 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003976 if (!temp) {
3977 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003978 Py_DECREF(reader);
3979 return NULL;
3980 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003981 buffer = temp;
3982 }
3983 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003984 Py_DECREF(buffer);
3985 break;
3986 }
3987
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003988 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3989 Py_DECREF(buffer);
3990 Py_DECREF(reader);
3991 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3992 return NULL;
3993 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003994 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003995 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003996 );
3997
3998 Py_DECREF(buffer);
3999
4000 if (!res) {
4001 Py_DECREF(reader);
4002 return NULL;
4003 }
4004 Py_DECREF(res);
4005
4006 }
4007
4008 Py_DECREF(reader);
4009
4010 res = expat_parse(self, "", 0, 1);
4011
4012 if (res && TreeBuilder_CheckExact(self->target)) {
4013 Py_DECREF(res);
4014 return treebuilder_done((TreeBuilderObject*) self->target);
4015 }
4016
4017 return res;
4018}
4019
Serhiy Storchakacb985562015-05-04 15:32:48 +03004020/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03004021_elementtree.XMLParser._setevents
4022
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004023 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03004024 events_to_report: object = None
4025 /
4026
4027[clinic start generated code]*/
4028
4029static PyObject *
4030_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4031 PyObject *events_queue,
4032 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004033/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004034{
4035 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004036 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004037 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004038 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004039
Oren Milman402e1cd2020-04-12 17:36:41 +03004040 if (!_check_xmlparser(self)) {
4041 return NULL;
4042 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004043 if (!TreeBuilder_CheckExact(self->target)) {
4044 PyErr_SetString(
4045 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004046 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004047 "targets"
4048 );
4049 return NULL;
4050 }
4051
4052 target = (TreeBuilderObject*) self->target;
4053
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02004054 events_append = PyObject_GetAttrString(events_queue, "append");
4055 if (events_append == NULL)
4056 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03004057 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004058
4059 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02004060 Py_CLEAR(target->start_event_obj);
4061 Py_CLEAR(target->end_event_obj);
4062 Py_CLEAR(target->start_ns_event_obj);
4063 Py_CLEAR(target->end_ns_event_obj);
Stefan Behnel43851a22019-05-01 21:20:38 +02004064 Py_CLEAR(target->comment_event_obj);
4065 Py_CLEAR(target->pi_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004066
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004067 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004068 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004069 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004070 Py_RETURN_NONE;
4071 }
4072
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004073 if (!(events_seq = PySequence_Fast(events_to_report,
4074 "events must be a sequence"))) {
4075 return NULL;
4076 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004077
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03004078 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004079 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02004080 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004081 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004082 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004083 } else if (PyBytes_Check(event_name_obj)) {
4084 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004085 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004086 if (event_name == NULL) {
4087 Py_DECREF(events_seq);
4088 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4089 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004090 }
4091
4092 Py_INCREF(event_name_obj);
4093 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004094 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004095 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004096 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004097 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004098 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004099 EXPAT(SetNamespaceDeclHandler)(
4100 self->parser,
4101 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4102 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4103 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004104 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03004105 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004106 EXPAT(SetNamespaceDeclHandler)(
4107 self->parser,
4108 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4109 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4110 );
Stefan Behnel43851a22019-05-01 21:20:38 +02004111 } else if (strcmp(event_name, "comment") == 0) {
4112 Py_XSETREF(target->comment_event_obj, event_name_obj);
4113 EXPAT(SetCommentHandler)(
4114 self->parser,
4115 (XML_CommentHandler) expat_comment_handler
4116 );
4117 } else if (strcmp(event_name, "pi") == 0) {
4118 Py_XSETREF(target->pi_event_obj, event_name_obj);
4119 EXPAT(SetProcessingInstructionHandler)(
4120 self->parser,
4121 (XML_ProcessingInstructionHandler) expat_pi_handler
4122 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004123 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02004124 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004125 Py_DECREF(events_seq);
4126 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004127 return NULL;
4128 }
4129 }
4130
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07004131 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004132 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004133}
4134
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004135static PyMemberDef xmlparser_members[] = {
4136 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4137 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4138 {NULL}
4139};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004140
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004141static PyObject*
4142xmlparser_version_getter(XMLParserObject *self, void *closure)
4143{
4144 return PyUnicode_FromFormat(
4145 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4146 XML_MINOR_VERSION, XML_MICRO_VERSION);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004147}
4148
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004149static PyGetSetDef xmlparser_getsetlist[] = {
4150 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4151 {NULL},
4152};
4153
Serhiy Storchakacb985562015-05-04 15:32:48 +03004154#include "clinic/_elementtree.c.h"
4155
4156static PyMethodDef element_methods[] = {
4157
4158 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4159
4160 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4161 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4162
4163 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4164 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4165 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4166
4167 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4168 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4169 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4170 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4171
4172 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4173 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4174 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4175
Serhiy Storchakacb985562015-05-04 15:32:48 +03004176 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4177 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4178
4179 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4180
4181 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4182 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4183 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4184 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4185 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4186
4187 {NULL, NULL}
4188};
4189
4190static PyMappingMethods element_as_mapping = {
4191 (lenfunc) element_length,
4192 (binaryfunc) element_subscr,
4193 (objobjargproc) element_ass_subscr,
4194};
4195
Serhiy Storchakadde08152015-11-25 15:28:13 +02004196static PyGetSetDef element_getsetlist[] = {
4197 {"tag",
4198 (getter)element_tag_getter,
4199 (setter)element_tag_setter,
4200 "A string identifying what kind of data this element represents"},
4201 {"text",
4202 (getter)element_text_getter,
4203 (setter)element_text_setter,
4204 "A string of text directly after the start tag, or None"},
4205 {"tail",
4206 (getter)element_tail_getter,
4207 (setter)element_tail_setter,
4208 "A string of text directly after the end tag, or None"},
4209 {"attrib",
4210 (getter)element_attrib_getter,
4211 (setter)element_attrib_setter,
4212 "A dictionary containing the element's attributes"},
4213 {NULL},
4214};
4215
Serhiy Storchakacb985562015-05-04 15:32:48 +03004216static PyTypeObject Element_Type = {
4217 PyVarObject_HEAD_INIT(NULL, 0)
4218 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4219 /* methods */
4220 (destructor)element_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004221 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004222 0, /* tp_getattr */
4223 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004224 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004225 (reprfunc)element_repr, /* tp_repr */
4226 0, /* tp_as_number */
4227 &element_as_sequence, /* tp_as_sequence */
4228 &element_as_mapping, /* tp_as_mapping */
4229 0, /* tp_hash */
4230 0, /* tp_call */
4231 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004232 PyObject_GenericGetAttr, /* tp_getattro */
4233 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004234 0, /* tp_as_buffer */
4235 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4236 /* tp_flags */
4237 0, /* tp_doc */
4238 (traverseproc)element_gc_traverse, /* tp_traverse */
4239 (inquiry)element_gc_clear, /* tp_clear */
4240 0, /* tp_richcompare */
4241 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4242 0, /* tp_iter */
4243 0, /* tp_iternext */
4244 element_methods, /* tp_methods */
4245 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02004246 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004247 0, /* tp_base */
4248 0, /* tp_dict */
4249 0, /* tp_descr_get */
4250 0, /* tp_descr_set */
4251 0, /* tp_dictoffset */
4252 (initproc)element_init, /* tp_init */
4253 PyType_GenericAlloc, /* tp_alloc */
4254 element_new, /* tp_new */
4255 0, /* tp_free */
4256};
4257
4258static PyMethodDef treebuilder_methods[] = {
4259 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4260 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4261 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
Stefan Behnel43851a22019-05-01 21:20:38 +02004262 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4263 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004264 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4265 {NULL, NULL}
4266};
4267
4268static PyTypeObject TreeBuilder_Type = {
4269 PyVarObject_HEAD_INIT(NULL, 0)
4270 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4271 /* methods */
4272 (destructor)treebuilder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004273 0, /* tp_vectorcall_offset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004274 0, /* tp_getattr */
4275 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004276 0, /* tp_as_async */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004277 0, /* tp_repr */
4278 0, /* tp_as_number */
4279 0, /* tp_as_sequence */
4280 0, /* tp_as_mapping */
4281 0, /* tp_hash */
4282 0, /* tp_call */
4283 0, /* tp_str */
4284 0, /* tp_getattro */
4285 0, /* tp_setattro */
4286 0, /* tp_as_buffer */
4287 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4288 /* tp_flags */
4289 0, /* tp_doc */
4290 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4291 (inquiry)treebuilder_gc_clear, /* tp_clear */
4292 0, /* tp_richcompare */
4293 0, /* tp_weaklistoffset */
4294 0, /* tp_iter */
4295 0, /* tp_iternext */
4296 treebuilder_methods, /* tp_methods */
4297 0, /* tp_members */
4298 0, /* tp_getset */
4299 0, /* tp_base */
4300 0, /* tp_dict */
4301 0, /* tp_descr_get */
4302 0, /* tp_descr_set */
4303 0, /* tp_dictoffset */
4304 _elementtree_TreeBuilder___init__, /* tp_init */
4305 PyType_GenericAlloc, /* tp_alloc */
4306 treebuilder_new, /* tp_new */
4307 0, /* tp_free */
4308};
4309
4310static PyMethodDef xmlparser_methods[] = {
4311 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4312 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4313 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4314 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03004315 {NULL, NULL}
4316};
4317
Neal Norwitz227b5332006-03-22 09:28:35 +00004318static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00004319 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08004320 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004321 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03004322 (destructor)xmlparser_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004323 0, /* tp_vectorcall_offset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004324 0, /* tp_getattr */
4325 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004326 0, /* tp_as_async */
Eli Bendersky52467b12012-06-01 07:13:08 +03004327 0, /* tp_repr */
4328 0, /* tp_as_number */
4329 0, /* tp_as_sequence */
4330 0, /* tp_as_mapping */
4331 0, /* tp_hash */
4332 0, /* tp_call */
4333 0, /* tp_str */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004334 0, /* tp_getattro */
Eli Bendersky52467b12012-06-01 07:13:08 +03004335 0, /* tp_setattro */
4336 0, /* tp_as_buffer */
4337 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4338 /* tp_flags */
4339 0, /* tp_doc */
4340 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4341 (inquiry)xmlparser_gc_clear, /* tp_clear */
4342 0, /* tp_richcompare */
4343 0, /* tp_weaklistoffset */
4344 0, /* tp_iter */
4345 0, /* tp_iternext */
4346 xmlparser_methods, /* tp_methods */
Serhiy Storchakab2953fa2018-10-04 10:41:27 +03004347 xmlparser_members, /* tp_members */
4348 xmlparser_getsetlist, /* tp_getset */
Eli Bendersky52467b12012-06-01 07:13:08 +03004349 0, /* tp_base */
4350 0, /* tp_dict */
4351 0, /* tp_descr_get */
4352 0, /* tp_descr_set */
4353 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004354 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004355 PyType_GenericAlloc, /* tp_alloc */
4356 xmlparser_new, /* tp_new */
4357 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004358};
4359
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004360/* ==================================================================== */
4361/* python module interface */
4362
4363static PyMethodDef _functions[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02004364 {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
Stefan Behnel43851a22019-05-01 21:20:38 +02004365 _ELEMENTTREE__SET_FACTORIES_METHODDEF
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004366 {NULL, NULL}
4367};
4368
Martin v. Löwis1a214512008-06-11 05:26:20 +00004369
Eli Bendersky532d03e2013-08-10 08:00:39 -07004370static struct PyModuleDef elementtreemodule = {
4371 PyModuleDef_HEAD_INIT,
4372 "_elementtree",
4373 NULL,
4374 sizeof(elementtreestate),
4375 _functions,
4376 NULL,
4377 elementtree_traverse,
4378 elementtree_clear,
4379 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004380};
4381
Neal Norwitzf6657e62006-12-28 04:47:50 +00004382PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004383PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004384{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004385 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004386 elementtreestate *st;
4387
4388 m = PyState_FindModule(&elementtreemodule);
4389 if (m) {
4390 Py_INCREF(m);
4391 return m;
4392 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004393
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004394 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004395 if (PyType_Ready(&ElementIter_Type) < 0)
4396 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004397 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004398 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004399 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004400 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004401 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004402 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004403
Eli Bendersky532d03e2013-08-10 08:00:39 -07004404 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004405 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004406 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08004407 st = get_elementtree_state(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004408
Eli Bendersky828efde2012-04-05 05:40:58 +03004409 if (!(temp = PyImport_ImportModule("copy")))
4410 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004411 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004412 Py_XDECREF(temp);
4413
Victor Stinnerb136f112017-07-10 22:28:02 +02004414 if (st->deepcopy_obj == NULL) {
4415 return NULL;
4416 }
4417
4418 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004419 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004420 return NULL;
4421
Eli Bendersky20d41742012-06-01 09:48:37 +03004422 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004423 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4424 if (expat_capi) {
4425 /* check that it's usable */
4426 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004427 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004428 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4429 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004430 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004431 PyErr_SetString(PyExc_ImportError,
4432 "pyexpat version is incompatible");
4433 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004434 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004435 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004436 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004437 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004438
Eli Bendersky532d03e2013-08-10 08:00:39 -07004439 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004440 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004441 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004442 Py_INCREF(st->parseerror_obj);
Dong-hee Na016bdd52020-03-29 23:12:11 +09004443 if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4444 Py_DECREF(st->parseerror_obj);
4445 return NULL;
4446 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004447
Dong-hee Na016bdd52020-03-29 23:12:11 +09004448 PyTypeObject *types[] = {
4449 &Element_Type,
4450 &TreeBuilder_Type,
4451 &XMLParser_Type
4452 };
Eli Bendersky092af1f2012-03-04 07:14:03 +02004453
Dong-hee Na016bdd52020-03-29 23:12:11 +09004454 for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4455 if (PyModule_AddType(m, types[i]) < 0) {
4456 return NULL;
4457 }
4458 }
Eli Bendersky52467b12012-06-01 07:13:08 +03004459
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004460 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004461}