blob: 1b8f81234df46af513d934b1854c36fa0f7f808b [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Miss Islington (bot)b1c80032018-10-14 00:55:49 -0700207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700339 if (attrib_str == NULL) {
340 return NULL;
341 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Miss Islington (bot)62674f32018-12-10 23:05:13 -0800355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356 Py_DECREF(attrib);
357 attrib = NULL;
358 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 } else {
360 attrib = PyDict_New();
361 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700362
363 Py_DECREF(attrib_str);
364
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700365 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
366 Py_DECREF(attrib);
367 return NULL;
368 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300369 return attrib;
370}
371
Serhiy Storchakacb985562015-05-04 15:32:48 +0300372/*[clinic input]
373module _elementtree
374class _elementtree.Element "ElementObject *" "&Element_Type"
375class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
376class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
377[clinic start generated code]*/
378/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
379
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380static int
381element_init(PyObject *self, PyObject *args, PyObject *kwds)
382{
383 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 PyObject *attrib = NULL;
385 ElementObject *self_elem;
386
387 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
388 return -1;
389
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 if (attrib) {
391 /* attrib passed as positional arg */
392 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 if (!attrib)
394 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (kwds) {
396 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200397 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300398 return -1;
399 }
400 }
401 } else if (kwds) {
402 /* have keywords args */
403 attrib = get_attrib_from_keywords(kwds);
404 if (!attrib)
405 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 }
407
408 self_elem = (ElementObject *)self;
409
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 return -1;
414 }
415 }
416
Eli Bendersky48d358b2012-05-30 17:57:50 +0300417 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200418 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419
420 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300422 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300425 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
Eli Bendersky092af1f2012-03-04 07:14:03 +0200427 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300428 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429
430 return 0;
431}
432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200434element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200436 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 PyObject* *children;
438
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700439 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 /* make sure self->children can hold the given number of extra
441 elements. set an exception and return -1 if allocation failed */
442
Victor Stinner5f0af232013-07-11 23:01:36 +0200443 if (!self->extra) {
444 if (create_extra(self, NULL) < 0)
445 return -1;
446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200448 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449
450 if (size > self->extra->allocated) {
451 /* use Python 2.4's list growth strategy */
452 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100454 * which needs at least 4 bytes.
455 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 * be safe.
457 */
458 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200459 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
460 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000462 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100463 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 * false alarm always assume at least one child to be safe.
465 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000466 children = PyObject_Realloc(self->extra->children,
467 size * sizeof(PyObject*));
468 if (!children)
469 goto nomemory;
470 } else {
471 children = PyObject_Malloc(size * sizeof(PyObject*));
472 if (!children)
473 goto nomemory;
474 /* copy existing children from static area to malloc buffer */
475 memcpy(children, self->extra->children,
476 self->extra->length * sizeof(PyObject*));
477 }
478 self->extra->children = children;
479 self->extra->allocated = size;
480 }
481
482 return 0;
483
484 nomemory:
485 PyErr_NoMemory();
486 return -1;
487}
488
489LOCAL(int)
490element_add_subelement(ElementObject* self, PyObject* element)
491{
492 /* add a child element to a parent */
493
494 if (element_resize(self, 1) < 0)
495 return -1;
496
497 Py_INCREF(element);
498 self->extra->children[self->extra->length] = element;
499
500 self->extra->length++;
501
502 return 0;
503}
504
505LOCAL(PyObject*)
506element_get_attrib(ElementObject* self)
507{
508 /* return borrowed reference to attrib dictionary */
509 /* note: this function assumes that the extra section exists */
510
511 PyObject* res = self->extra->attrib;
512
513 if (res == Py_None) {
514 /* create missing dictionary */
515 res = PyDict_New();
516 if (!res)
517 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200518 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000519 self->extra->attrib = res;
520 }
521
522 return res;
523}
524
525LOCAL(PyObject*)
526element_get_text(ElementObject* self)
527{
528 /* return borrowed reference to text attribute */
529
Serhiy Storchaka576def02017-03-30 09:47:31 +0300530 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000531
532 if (JOIN_GET(res)) {
533 res = JOIN_OBJ(res);
534 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300535 PyObject *tmp = list_join(res);
536 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300538 self->text = tmp;
539 Py_DECREF(res);
540 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000541 }
542 }
543
544 return res;
545}
546
547LOCAL(PyObject*)
548element_get_tail(ElementObject* self)
549{
550 /* return borrowed reference to text attribute */
551
Serhiy Storchaka576def02017-03-30 09:47:31 +0300552 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553
554 if (JOIN_GET(res)) {
555 res = JOIN_OBJ(res);
556 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300557 PyObject *tmp = list_join(res);
558 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300560 self->tail = tmp;
561 Py_DECREF(res);
562 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 }
564 }
565
566 return res;
567}
568
569static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300570subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000571{
572 PyObject* elem;
573
574 ElementObject* parent;
575 PyObject* tag;
576 PyObject* attrib = NULL;
577 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
578 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800579 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000580 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800581 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582
Eli Bendersky737b1732012-05-29 06:02:56 +0300583 if (attrib) {
584 /* attrib passed as positional arg */
585 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586 if (!attrib)
587 return NULL;
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700588 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
589 Py_DECREF(attrib);
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 }
592 } else if (kwds) {
593 /* have keyword args */
594 attrib = get_attrib_from_keywords(kwds);
595 if (!attrib)
596 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000597 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300598 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 Py_INCREF(Py_None);
600 attrib = Py_None;
601 }
602
Eli Bendersky092af1f2012-03-04 07:14:03 +0200603 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200605 if (elem == NULL)
606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000608 if (element_add_subelement(parent, elem) < 0) {
609 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000610 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000611 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612
613 return elem;
614}
615
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616static int
617element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
618{
619 Py_VISIT(self->tag);
620 Py_VISIT(JOIN_OBJ(self->text));
621 Py_VISIT(JOIN_OBJ(self->tail));
622
623 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200624 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300625 Py_VISIT(self->extra->attrib);
626
627 for (i = 0; i < self->extra->length; ++i)
628 Py_VISIT(self->extra->children[i]);
629 }
630 return 0;
631}
632
633static int
634element_gc_clear(ElementObject *self)
635{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700637 _clear_joined_ptr(&self->text);
638 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639
640 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300641 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700643 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300644 return 0;
645}
646
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647static void
648element_dealloc(ElementObject* self)
649{
INADA Naokia6296d32017-08-24 14:55:17 +0900650 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300651 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200652 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300653
654 if (self->weakreflist != NULL)
655 PyObject_ClearWeakRefs((PyObject *) self);
656
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657 /* element_gc_clear clears all references and deallocates extra
658 */
659 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660
661 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200662 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200663 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664}
665
666/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668/*[clinic input]
669_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
Serhiy Storchakacb985562015-05-04 15:32:48 +0300671 subelement: object(subclass_of='&Element_Type')
672 /
673
674[clinic start generated code]*/
675
676static PyObject *
677_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
678/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
679{
680 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681 return NULL;
682
683 Py_RETURN_NONE;
684}
685
Serhiy Storchakacb985562015-05-04 15:32:48 +0300686/*[clinic input]
687_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000688
Serhiy Storchakacb985562015-05-04 15:32:48 +0300689[clinic start generated code]*/
690
691static PyObject *
692_elementtree_Element_clear_impl(ElementObject *self)
693/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
694{
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700695 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696
697 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300698 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300701 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.__copy__
708
709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element___copy___impl(ElementObject *self)
713/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200715 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 ElementObject* element;
717
Eli Bendersky092af1f2012-03-04 07:14:03 +0200718 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800719 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 if (!element)
721 return NULL;
722
Oren Milman39ecb9c2017-10-10 23:26:24 +0300723 Py_INCREF(JOIN_OBJ(self->text));
724 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725
Oren Milman39ecb9c2017-10-10 23:26:24 +0300726 Py_INCREF(JOIN_OBJ(self->tail));
727 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700729 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000731 if (element_resize(element, self->extra->length) < 0) {
732 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000734 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735
736 for (i = 0; i < self->extra->length; i++) {
737 Py_INCREF(self->extra->children[i]);
738 element->extra->children[i] = self->extra->children[i];
739 }
740
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700741 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 }
744
745 return (PyObject*) element;
746}
747
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200748/* Helper for a deep copy. */
749LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
Oren Milmand0568182017-09-12 17:39:15 +0300754 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300755 /
756
757[clinic start generated code]*/
758
759static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300760_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300797 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 tail = deepcopy(JOIN_OBJ(self->tail), memo);
800 if (!tail)
801 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300802 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700804 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806 if (element_resize(element, self->extra->length) < 0)
807 goto error;
808
809 for (i = 0; i < self->extra->length; i++) {
810 PyObject* child = deepcopy(self->extra->children[i], memo);
811 if (!child) {
812 element->extra->length = i;
813 goto error;
814 }
815 element->extra->children[i] = child;
816 }
817
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700818 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700823 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200841LOCAL(PyObject *)
842deepcopy(PyObject *object, PyObject *memo)
843{
844 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200845 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200846 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200847
848 /* Fast paths */
849 if (object == Py_None || PyUnicode_CheckExact(object)) {
850 Py_INCREF(object);
851 return object;
852 }
853
854 if (Py_REFCNT(object) == 1) {
855 if (PyDict_CheckExact(object)) {
856 PyObject *key, *value;
857 Py_ssize_t pos = 0;
858 int simple = 1;
859 while (PyDict_Next(object, &pos, &key, &value)) {
860 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
861 simple = 0;
862 break;
863 }
864 }
865 if (simple)
866 return PyDict_Copy(object);
867 /* Fall through to general case */
868 }
869 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300870 return _elementtree_Element___deepcopy___impl(
871 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200872 }
873 }
874
875 /* General case */
876 st = ET_STATE_GLOBAL;
877 if (!st->deepcopy_obj) {
878 PyErr_SetString(PyExc_RuntimeError,
879 "deepcopy helper not found");
880 return NULL;
881 }
882
Victor Stinner7fbac452016-08-20 01:34:44 +0200883 stack[0] = object;
884 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200885 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200886}
887
888
Serhiy Storchakacb985562015-05-04 15:32:48 +0300889/*[clinic input]
890_elementtree.Element.__sizeof__ -> Py_ssize_t
891
892[clinic start generated code]*/
893
894static Py_ssize_t
895_elementtree_Element___sizeof___impl(ElementObject *self)
896/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200897{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200898 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200899 if (self->extra) {
900 result += sizeof(ElementObjectExtra);
901 if (self->extra->children != self->extra->_children)
902 result += sizeof(PyObject*) * self->extra->allocated;
903 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300904 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200905}
906
Eli Bendersky698bdb22013-01-10 06:01:06 -0800907/* dict keys for getstate/setstate. */
908#define PICKLED_TAG "tag"
909#define PICKLED_CHILDREN "_children"
910#define PICKLED_ATTRIB "attrib"
911#define PICKLED_TAIL "tail"
912#define PICKLED_TEXT "text"
913
914/* __getstate__ returns a fabricated instance dict as in the pure-Python
915 * Element implementation, for interoperability/interchangeability. This
916 * makes the pure-Python implementation details an API, but (a) there aren't
917 * any unnecessary structures there; and (b) it buys compatibility with 3.2
918 * pickles. See issue #16076.
919 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300920/*[clinic input]
921_elementtree.Element.__getstate__
922
923[clinic start generated code]*/
924
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300926_elementtree_Element___getstate___impl(ElementObject *self)
927/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800928{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200929 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800930 PyObject *instancedict = NULL, *children;
931
932 /* Build a list of children. */
933 children = PyList_New(self->extra ? self->extra->length : 0);
934 if (!children)
935 return NULL;
936 for (i = 0; i < PyList_GET_SIZE(children); i++) {
937 PyObject *child = self->extra->children[i];
938 Py_INCREF(child);
939 PyList_SET_ITEM(children, i, child);
940 }
941
942 /* Construct the state object. */
943 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
944 if (noattrib)
945 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
946 PICKLED_TAG, self->tag,
947 PICKLED_CHILDREN, children,
948 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700949 PICKLED_TEXT, JOIN_OBJ(self->text),
950 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800951 else
952 instancedict = Py_BuildValue("{sOsOsOsOsO}",
953 PICKLED_TAG, self->tag,
954 PICKLED_CHILDREN, children,
955 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700956 PICKLED_TEXT, JOIN_OBJ(self->text),
957 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800958 if (instancedict) {
959 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800960 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800961 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800962 else {
963 for (i = 0; i < PyList_GET_SIZE(children); i++)
964 Py_DECREF(PyList_GET_ITEM(children, i));
965 Py_DECREF(children);
966
967 return NULL;
968 }
969}
970
971static PyObject *
972element_setstate_from_attributes(ElementObject *self,
973 PyObject *tag,
974 PyObject *attrib,
975 PyObject *text,
976 PyObject *tail,
977 PyObject *children)
978{
979 Py_ssize_t i, nchildren;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700980 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800981
982 if (!tag) {
983 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
984 return NULL;
985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200987 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300988 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800989
Oren Milman39ecb9c2017-10-10 23:26:24 +0300990 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
991 Py_INCREF(JOIN_OBJ(text));
992 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800993
Oren Milman39ecb9c2017-10-10 23:26:24 +0300994 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
995 Py_INCREF(JOIN_OBJ(tail));
996 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997
998 /* Handle ATTRIB and CHILDREN. */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700999 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001000 Py_RETURN_NONE;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001001 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001002
1003 /* Compute 'nchildren'. */
1004 if (children) {
1005 if (!PyList_Check(children)) {
1006 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1007 return NULL;
1008 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001009 nchildren = PyList_GET_SIZE(children);
1010
1011 /* (Re-)allocate 'extra'.
1012 Avoid DECREFs calling into this code again (cycles, etc.)
1013 */
1014 oldextra = self->extra;
1015 self->extra = NULL;
1016 if (element_resize(self, nchildren)) {
1017 assert(!self->extra || !self->extra->length);
1018 clear_extra(self);
1019 self->extra = oldextra;
1020 return NULL;
1021 }
1022 assert(self->extra);
1023 assert(self->extra->allocated >= nchildren);
1024 if (oldextra) {
1025 assert(self->extra->attrib == Py_None);
1026 self->extra->attrib = oldextra->attrib;
1027 oldextra->attrib = Py_None;
1028 }
1029
1030 /* Copy children */
1031 for (i = 0; i < nchildren; i++) {
1032 self->extra->children[i] = PyList_GET_ITEM(children, i);
1033 Py_INCREF(self->extra->children[i]);
1034 }
1035
1036 assert(!self->extra->length);
1037 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001038 }
1039 else {
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001040 if (element_resize(self, 0)) {
1041 return NULL;
1042 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001043 }
1044
Eli Bendersky698bdb22013-01-10 06:01:06 -08001045 /* Stash attrib. */
1046 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001047 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001048 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001049 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001050 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001051
1052 Py_RETURN_NONE;
1053}
1054
1055/* __setstate__ for Element instance from the Python implementation.
1056 * 'state' should be the instance dict.
1057 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001058
Eli Bendersky698bdb22013-01-10 06:01:06 -08001059static PyObject *
1060element_setstate_from_Python(ElementObject *self, PyObject *state)
1061{
1062 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1063 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1064 PyObject *args;
1065 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001066 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067
Eli Bendersky698bdb22013-01-10 06:01:06 -08001068 tag = attrib = text = tail = children = NULL;
1069 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001070 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001071 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001072
1073 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1074 &attrib, &text, &tail, &children))
1075 retval = element_setstate_from_attributes(self, tag, attrib, text,
1076 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001077 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001078 retval = NULL;
1079
1080 Py_DECREF(args);
1081 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.__setstate__
1086
1087 state: object
1088 /
1089
1090[clinic start generated code]*/
1091
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001093_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1094/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001095{
1096 if (!PyDict_CheckExact(state)) {
1097 PyErr_Format(PyExc_TypeError,
1098 "Don't know how to unpickle \"%.200R\" as an Element",
1099 state);
1100 return NULL;
1101 }
1102 else
1103 return element_setstate_from_Python(self, state);
1104}
1105
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106LOCAL(int)
1107checkpath(PyObject* tag)
1108{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001109 Py_ssize_t i;
1110 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111
1112 /* check if a tag contains an xpath character */
1113
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001114#define PATHCHAR(ch) \
1115 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1119 void *data = PyUnicode_DATA(tag);
1120 unsigned int kind = PyUnicode_KIND(tag);
1121 for (i = 0; i < len; i++) {
1122 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1123 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001124 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128 return 1;
1129 }
1130 return 0;
1131 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001132 if (PyBytes_Check(tag)) {
1133 char *p = PyBytes_AS_STRING(tag);
1134 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135 if (p[i] == '{')
1136 check = 0;
1137 else if (p[i] == '}')
1138 check = 1;
1139 else if (check && PATHCHAR(p[i]))
1140 return 1;
1141 }
1142 return 0;
1143 }
1144
1145 return 1; /* unknown type; might be path expression */
1146}
1147
Serhiy Storchakacb985562015-05-04 15:32:48 +03001148/*[clinic input]
1149_elementtree.Element.extend
1150
1151 elements: object
1152 /
1153
1154[clinic start generated code]*/
1155
1156static PyObject *
1157_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1158/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159{
1160 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001161 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001162
Serhiy Storchakacb985562015-05-04 15:32:48 +03001163 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001164 if (!seq) {
1165 PyErr_Format(
1166 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001167 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001168 );
1169 return NULL;
1170 }
1171
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001172 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001173 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001174 Py_INCREF(element);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001175 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001176 PyErr_Format(
1177 PyExc_TypeError,
1178 "expected an Element, not \"%.200s\"",
1179 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001180 Py_DECREF(seq);
1181 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001182 return NULL;
1183 }
1184
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001185 if (element_add_subelement(self, element) < 0) {
1186 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001187 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188 return NULL;
1189 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191 }
1192
1193 Py_DECREF(seq);
1194
1195 Py_RETURN_NONE;
1196}
1197
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198/*[clinic input]
1199_elementtree.Element.find
1200
1201 path: object
1202 namespaces: object = None
1203
1204[clinic start generated code]*/
1205
1206static PyObject *
1207_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1208 PyObject *namespaces)
1209/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001210{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001211 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001212 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001213
Serhiy Storchakacb985562015-05-04 15:32:48 +03001214 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001215 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001216 return _PyObject_CallMethodIdObjArgs(
1217 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220
1221 if (!self->extra)
1222 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 for (i = 0; i < self->extra->length; i++) {
1225 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001226 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001227 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001228 continue;
1229 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001230 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001231 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001233 Py_DECREF(item);
1234 if (rc < 0)
1235 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 }
1237
1238 Py_RETURN_NONE;
1239}
1240
Serhiy Storchakacb985562015-05-04 15:32:48 +03001241/*[clinic input]
1242_elementtree.Element.findtext
1243
1244 path: object
1245 default: object = None
1246 namespaces: object = None
1247
1248[clinic start generated code]*/
1249
1250static PyObject *
1251_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1252 PyObject *default_value,
1253 PyObject *namespaces)
1254/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001255{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001256 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001257 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001258 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001259
Serhiy Storchakacb985562015-05-04 15:32:48 +03001260 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001261 return _PyObject_CallMethodIdObjArgs(
1262 st->elementpath_obj, &PyId_findtext,
1263 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264 );
1265
1266 if (!self->extra) {
1267 Py_INCREF(default_value);
1268 return default_value;
1269 }
1270
1271 for (i = 0; i < self->extra->length; i++) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001272 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001273 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001274 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001275 continue;
1276 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001277 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001278 if (rc > 0) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001279 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001280 if (text == Py_None) {
1281 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001282 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001283 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001284 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001285 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286 return text;
1287 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001288 Py_DECREF(item);
1289 if (rc < 0)
1290 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001291 }
1292
1293 Py_INCREF(default_value);
1294 return default_value;
1295}
1296
Serhiy Storchakacb985562015-05-04 15:32:48 +03001297/*[clinic input]
1298_elementtree.Element.findall
1299
1300 path: object
1301 namespaces: object = None
1302
1303[clinic start generated code]*/
1304
1305static PyObject *
1306_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1307 PyObject *namespaces)
1308/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001310 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001311 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001312 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001313
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001314 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001315 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001316 return _PyObject_CallMethodIdObjArgs(
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001317 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001319 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320
1321 out = PyList_New(0);
1322 if (!out)
1323 return NULL;
1324
1325 if (!self->extra)
1326 return out;
1327
1328 for (i = 0; i < self->extra->length; i++) {
1329 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001330 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001331 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001332 continue;
1333 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001334 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001335 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1336 Py_DECREF(item);
1337 Py_DECREF(out);
1338 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001339 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001340 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341 }
1342
1343 return out;
1344}
1345
Serhiy Storchakacb985562015-05-04 15:32:48 +03001346/*[clinic input]
1347_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001348
Serhiy Storchakacb985562015-05-04 15:32:48 +03001349 path: object
1350 namespaces: object = None
1351
1352[clinic start generated code]*/
1353
1354static PyObject *
1355_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1356 PyObject *namespaces)
1357/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1358{
1359 PyObject* tag = path;
1360 _Py_IDENTIFIER(iterfind);
1361 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362
Victor Stinnerf5616342016-12-09 15:26:00 +01001363 return _PyObject_CallMethodIdObjArgs(
1364 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001365}
1366
Serhiy Storchakacb985562015-05-04 15:32:48 +03001367/*[clinic input]
1368_elementtree.Element.get
1369
1370 key: object
1371 default: object = None
1372
1373[clinic start generated code]*/
1374
1375static PyObject *
1376_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1377 PyObject *default_value)
1378/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001379{
1380 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001381
1382 if (!self->extra || self->extra->attrib == Py_None)
1383 value = default_value;
1384 else {
1385 value = PyDict_GetItem(self->extra->attrib, key);
1386 if (!value)
1387 value = default_value;
1388 }
1389
1390 Py_INCREF(value);
1391 return value;
1392}
1393
Serhiy Storchakacb985562015-05-04 15:32:48 +03001394/*[clinic input]
1395_elementtree.Element.getchildren
1396
1397[clinic start generated code]*/
1398
1399static PyObject *
1400_elementtree_Element_getchildren_impl(ElementObject *self)
1401/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001403 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404 PyObject* list;
1405
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001406 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1407 "This method will be removed in future versions. "
1408 "Use 'list(elem)' or iteration over elem instead.",
1409 1) < 0) {
1410 return NULL;
1411 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001413 if (!self->extra)
1414 return PyList_New(0);
1415
1416 list = PyList_New(self->extra->length);
1417 if (!list)
1418 return NULL;
1419
1420 for (i = 0; i < self->extra->length; i++) {
1421 PyObject* item = self->extra->children[i];
1422 Py_INCREF(item);
1423 PyList_SET_ITEM(list, i, item);
1424 }
1425
1426 return list;
1427}
1428
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001429
Eli Bendersky64d11e62012-06-15 07:42:50 +03001430static PyObject *
1431create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1432
1433
Serhiy Storchakacb985562015-05-04 15:32:48 +03001434/*[clinic input]
1435_elementtree.Element.iter
1436
1437 tag: object = None
1438
1439[clinic start generated code]*/
1440
Eli Bendersky64d11e62012-06-15 07:42:50 +03001441static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001442_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1443/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001444{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001445 if (PyUnicode_Check(tag)) {
1446 if (PyUnicode_READY(tag) < 0)
1447 return NULL;
1448 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1449 tag = Py_None;
1450 }
1451 else if (PyBytes_Check(tag)) {
1452 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1453 tag = Py_None;
1454 }
1455
Eli Bendersky64d11e62012-06-15 07:42:50 +03001456 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001457}
1458
1459
Serhiy Storchakacb985562015-05-04 15:32:48 +03001460/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001461_elementtree.Element.getiterator
1462
1463 tag: object = None
1464
1465[clinic start generated code]*/
1466
1467static PyObject *
1468_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1469/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1470{
1471 /* Change for a DeprecationWarning in 1.4 */
1472 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1473 "This method will be removed in future versions. "
1474 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1475 1) < 0) {
1476 return NULL;
1477 }
1478 return _elementtree_Element_iter_impl(self, tag);
1479}
1480
1481
1482/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001483_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001484
Serhiy Storchakacb985562015-05-04 15:32:48 +03001485[clinic start generated code]*/
1486
1487static PyObject *
1488_elementtree_Element_itertext_impl(ElementObject *self)
1489/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1490{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001491 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492}
1493
Eli Bendersky64d11e62012-06-15 07:42:50 +03001494
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001496element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001498 ElementObject* self = (ElementObject*) self_;
1499
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500 if (!self->extra || index < 0 || index >= self->extra->length) {
1501 PyErr_SetString(
1502 PyExc_IndexError,
1503 "child index out of range"
1504 );
1505 return NULL;
1506 }
1507
1508 Py_INCREF(self->extra->children[index]);
1509 return self->extra->children[index];
1510}
1511
Serhiy Storchakacb985562015-05-04 15:32:48 +03001512/*[clinic input]
1513_elementtree.Element.insert
1514
1515 index: Py_ssize_t
1516 subelement: object(subclass_of='&Element_Type')
1517 /
1518
1519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1523 PyObject *subelement)
1524/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001526 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527
Victor Stinner5f0af232013-07-11 23:01:36 +02001528 if (!self->extra) {
1529 if (create_extra(self, NULL) < 0)
1530 return NULL;
1531 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001533 if (index < 0) {
1534 index += self->extra->length;
1535 if (index < 0)
1536 index = 0;
1537 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001538 if (index > self->extra->length)
1539 index = self->extra->length;
1540
1541 if (element_resize(self, 1) < 0)
1542 return NULL;
1543
1544 for (i = self->extra->length; i > index; i--)
1545 self->extra->children[i] = self->extra->children[i-1];
1546
Serhiy Storchakacb985562015-05-04 15:32:48 +03001547 Py_INCREF(subelement);
1548 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549
1550 self->extra->length++;
1551
1552 Py_RETURN_NONE;
1553}
1554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555/*[clinic input]
1556_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557
Serhiy Storchakacb985562015-05-04 15:32:48 +03001558[clinic start generated code]*/
1559
1560static PyObject *
1561_elementtree_Element_items_impl(ElementObject *self)
1562/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1563{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564 if (!self->extra || self->extra->attrib == Py_None)
1565 return PyList_New(0);
1566
1567 return PyDict_Items(self->extra->attrib);
1568}
1569
Serhiy Storchakacb985562015-05-04 15:32:48 +03001570/*[clinic input]
1571_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572
Serhiy Storchakacb985562015-05-04 15:32:48 +03001573[clinic start generated code]*/
1574
1575static PyObject *
1576_elementtree_Element_keys_impl(ElementObject *self)
1577/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1578{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579 if (!self->extra || self->extra->attrib == Py_None)
1580 return PyList_New(0);
1581
1582 return PyDict_Keys(self->extra->attrib);
1583}
1584
Martin v. Löwis18e16552006-02-15 17:27:45 +00001585static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586element_length(ElementObject* self)
1587{
1588 if (!self->extra)
1589 return 0;
1590
1591 return self->extra->length;
1592}
1593
Serhiy Storchakacb985562015-05-04 15:32:48 +03001594/*[clinic input]
1595_elementtree.Element.makeelement
1596
1597 tag: object
1598 attrib: object
1599 /
1600
1601[clinic start generated code]*/
1602
1603static PyObject *
1604_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1605 PyObject *attrib)
1606/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607{
1608 PyObject* elem;
1609
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 attrib = PyDict_Copy(attrib);
1611 if (!attrib)
1612 return NULL;
1613
Eli Bendersky092af1f2012-03-04 07:14:03 +02001614 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615
1616 Py_DECREF(attrib);
1617
1618 return elem;
1619}
1620
Serhiy Storchakacb985562015-05-04 15:32:48 +03001621/*[clinic input]
1622_elementtree.Element.remove
1623
1624 subelement: object(subclass_of='&Element_Type')
1625 /
1626
1627[clinic start generated code]*/
1628
1629static PyObject *
1630_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1631/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001633 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001634 int rc;
1635 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 if (!self->extra) {
1638 /* element has no children, so raise exception */
1639 PyErr_SetString(
1640 PyExc_ValueError,
1641 "list.remove(x): x not in list"
1642 );
1643 return NULL;
1644 }
1645
1646 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001647 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001648 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001649 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001650 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001652 if (rc < 0)
1653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001654 }
1655
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001656 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001657 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 PyErr_SetString(
1659 PyExc_ValueError,
1660 "list.remove(x): x not in list"
1661 );
1662 return NULL;
1663 }
1664
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001665 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666
1667 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 for (; i < self->extra->length; i++)
1669 self->extra->children[i] = self->extra->children[i+1];
1670
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001671 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672 Py_RETURN_NONE;
1673}
1674
1675static PyObject*
1676element_repr(ElementObject* self)
1677{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001678 int status;
1679
1680 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001681 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001682
1683 status = Py_ReprEnter((PyObject *)self);
1684 if (status == 0) {
1685 PyObject *res;
1686 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1687 Py_ReprLeave((PyObject *)self);
1688 return res;
1689 }
1690 if (status > 0)
1691 PyErr_Format(PyExc_RuntimeError,
1692 "reentrant call inside %s.__repr__",
1693 Py_TYPE(self)->tp_name);
1694 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695}
1696
Serhiy Storchakacb985562015-05-04 15:32:48 +03001697/*[clinic input]
1698_elementtree.Element.set
1699
1700 key: object
1701 value: object
1702 /
1703
1704[clinic start generated code]*/
1705
1706static PyObject *
1707_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1708 PyObject *value)
1709/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710{
1711 PyObject* attrib;
1712
Victor Stinner5f0af232013-07-11 23:01:36 +02001713 if (!self->extra) {
1714 if (create_extra(self, NULL) < 0)
1715 return NULL;
1716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001717
1718 attrib = element_get_attrib(self);
1719 if (!attrib)
1720 return NULL;
1721
1722 if (PyDict_SetItem(attrib, key, value) < 0)
1723 return NULL;
1724
1725 Py_RETURN_NONE;
1726}
1727
1728static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001729element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001731 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001732 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733 PyObject* old;
1734
1735 if (!self->extra || index < 0 || index >= self->extra->length) {
1736 PyErr_SetString(
1737 PyExc_IndexError,
1738 "child assignment index out of range");
1739 return -1;
1740 }
1741
1742 old = self->extra->children[index];
1743
1744 if (item) {
1745 Py_INCREF(item);
1746 self->extra->children[index] = item;
1747 } else {
1748 self->extra->length--;
1749 for (i = index; i < self->extra->length; i++)
1750 self->extra->children[i] = self->extra->children[i+1];
1751 }
1752
1753 Py_DECREF(old);
1754
1755 return 0;
1756}
1757
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001758static PyObject*
1759element_subscr(PyObject* self_, PyObject* item)
1760{
1761 ElementObject* self = (ElementObject*) self_;
1762
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763 if (PyIndex_Check(item)) {
1764 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001765
1766 if (i == -1 && PyErr_Occurred()) {
1767 return NULL;
1768 }
1769 if (i < 0 && self->extra)
1770 i += self->extra->length;
1771 return element_getitem(self_, i);
1772 }
1773 else if (PySlice_Check(item)) {
Miss Islington (bot)f02d1a42019-05-17 00:33:10 -07001774 Py_ssize_t start, stop, step, slicelen, i;
1775 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776 PyObject* list;
1777
1778 if (!self->extra)
1779 return PyList_New(0);
1780
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001781 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 return NULL;
1783 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001784 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1785 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001786
1787 if (slicelen <= 0)
1788 return PyList_New(0);
1789 else {
1790 list = PyList_New(slicelen);
1791 if (!list)
1792 return NULL;
1793
1794 for (cur = start, i = 0; i < slicelen;
1795 cur += step, i++) {
1796 PyObject* item = self->extra->children[cur];
1797 Py_INCREF(item);
1798 PyList_SET_ITEM(list, i, item);
1799 }
1800
1801 return list;
1802 }
1803 }
1804 else {
1805 PyErr_SetString(PyExc_TypeError,
1806 "element indices must be integers");
1807 return NULL;
1808 }
1809}
1810
1811static int
1812element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1813{
1814 ElementObject* self = (ElementObject*) self_;
1815
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001816 if (PyIndex_Check(item)) {
1817 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001818
1819 if (i == -1 && PyErr_Occurred()) {
1820 return -1;
1821 }
1822 if (i < 0 && self->extra)
1823 i += self->extra->length;
1824 return element_setitem(self_, i, value);
1825 }
1826 else if (PySlice_Check(item)) {
Miss Islington (bot)f02d1a42019-05-17 00:33:10 -07001827 Py_ssize_t start, stop, step, slicelen, newlen, i;
1828 size_t cur;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001829
1830 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001831 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001832
Victor Stinner5f0af232013-07-11 23:01:36 +02001833 if (!self->extra) {
1834 if (create_extra(self, NULL) < 0)
1835 return -1;
1836 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001837
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001838 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001839 return -1;
1840 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001841 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1842 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001843
Eli Bendersky865756a2012-03-09 13:38:15 +02001844 if (value == NULL) {
1845 /* Delete slice */
1846 size_t cur;
1847 Py_ssize_t i;
1848
1849 if (slicelen <= 0)
1850 return 0;
1851
1852 /* Since we're deleting, the direction of the range doesn't matter,
1853 * so for simplicity make it always ascending.
1854 */
1855 if (step < 0) {
1856 stop = start + 1;
1857 start = stop + step * (slicelen - 1) - 1;
1858 step = -step;
1859 }
1860
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001861 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001862
1863 /* recycle is a list that will contain all the children
1864 * scheduled for removal.
1865 */
1866 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001867 return -1;
1868 }
1869
1870 /* This loop walks over all the children that have to be deleted,
1871 * with cur pointing at them. num_moved is the amount of children
1872 * until the next deleted child that have to be "shifted down" to
1873 * occupy the deleted's places.
1874 * Note that in the ith iteration, shifting is done i+i places down
1875 * because i children were already removed.
1876 */
1877 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1878 /* Compute how many children have to be moved, clipping at the
1879 * list end.
1880 */
1881 Py_ssize_t num_moved = step - 1;
1882 if (cur + step >= (size_t)self->extra->length) {
1883 num_moved = self->extra->length - cur - 1;
1884 }
1885
1886 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1887
1888 memmove(
1889 self->extra->children + cur - i,
1890 self->extra->children + cur + 1,
1891 num_moved * sizeof(PyObject *));
1892 }
1893
1894 /* Leftover "tail" after the last removed child */
1895 cur = start + (size_t)slicelen * step;
1896 if (cur < (size_t)self->extra->length) {
1897 memmove(
1898 self->extra->children + cur - slicelen,
1899 self->extra->children + cur,
1900 (self->extra->length - cur) * sizeof(PyObject *));
1901 }
1902
1903 self->extra->length -= slicelen;
1904
1905 /* Discard the recycle list with all the deleted sub-elements */
Miss Islington (bot)c46f0422018-10-23 12:45:44 -07001906 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001907 return 0;
1908 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001909
1910 /* A new slice is actually being assigned */
1911 seq = PySequence_Fast(value, "");
1912 if (!seq) {
1913 PyErr_Format(
1914 PyExc_TypeError,
1915 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1916 );
1917 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001919 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001920
1921 if (step != 1 && newlen != slicelen)
1922 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001923 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001924 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001925 "attempt to assign sequence of size %zd "
1926 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001927 newlen, slicelen
1928 );
1929 return -1;
1930 }
1931
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001932 /* Resize before creating the recycle bin, to prevent refleaks. */
1933 if (newlen > slicelen) {
1934 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001935 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001936 return -1;
1937 }
1938 }
1939
1940 if (slicelen > 0) {
1941 /* to avoid recursive calls to this method (via decref), move
1942 old items to the recycle bin here, and get rid of them when
1943 we're done modifying the element */
1944 recycle = PyList_New(slicelen);
1945 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001946 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001947 return -1;
1948 }
1949 for (cur = start, i = 0; i < slicelen;
1950 cur += step, i++)
1951 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1952 }
1953
1954 if (newlen < slicelen) {
1955 /* delete slice */
1956 for (i = stop; i < self->extra->length; i++)
1957 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1958 } else if (newlen > slicelen) {
1959 /* insert slice */
1960 for (i = self->extra->length-1; i >= stop; i--)
1961 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1962 }
1963
1964 /* replace the slice */
1965 for (cur = start, i = 0; i < newlen;
1966 cur += step, i++) {
1967 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1968 Py_INCREF(element);
1969 self->extra->children[cur] = element;
1970 }
1971
1972 self->extra->length += newlen - slicelen;
1973
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001974 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001975
1976 /* discard the recycle bin, and everything in it */
1977 Py_XDECREF(recycle);
1978
1979 return 0;
1980 }
1981 else {
1982 PyErr_SetString(PyExc_TypeError,
1983 "element indices must be integers");
1984 return -1;
1985 }
1986}
1987
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001988static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001989element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001990{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001991 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001992 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001993 return res;
1994}
1995
Serhiy Storchakadde08152015-11-25 15:28:13 +02001996static PyObject*
1997element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001998{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001999 PyObject *res = element_get_text(self);
2000 Py_XINCREF(res);
2001 return res;
2002}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002003
Serhiy Storchakadde08152015-11-25 15:28:13 +02002004static PyObject*
2005element_tail_getter(ElementObject *self, void *closure)
2006{
2007 PyObject *res = element_get_tail(self);
2008 Py_XINCREF(res);
2009 return res;
2010}
2011
2012static PyObject*
2013element_attrib_getter(ElementObject *self, void *closure)
2014{
2015 PyObject *res;
2016 if (!self->extra) {
2017 if (create_extra(self, NULL) < 0)
2018 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002019 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002020 res = element_get_attrib(self);
2021 Py_XINCREF(res);
2022 return res;
2023}
Victor Stinner4d463432013-07-11 23:05:03 +02002024
Serhiy Storchakadde08152015-11-25 15:28:13 +02002025/* macro for setter validation */
2026#define _VALIDATE_ATTR_VALUE(V) \
2027 if ((V) == NULL) { \
2028 PyErr_SetString( \
2029 PyExc_AttributeError, \
2030 "can't delete element attribute"); \
2031 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002032 }
2033
Serhiy Storchakadde08152015-11-25 15:28:13 +02002034static int
2035element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2036{
2037 _VALIDATE_ATTR_VALUE(value);
2038 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002039 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002040 return 0;
2041}
2042
2043static int
2044element_text_setter(ElementObject *self, PyObject *value, void *closure)
2045{
2046 _VALIDATE_ATTR_VALUE(value);
2047 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002048 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002049 return 0;
2050}
2051
2052static int
2053element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2054{
2055 _VALIDATE_ATTR_VALUE(value);
2056 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002057 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002058 return 0;
2059}
2060
2061static int
2062element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2063{
2064 _VALIDATE_ATTR_VALUE(value);
2065 if (!self->extra) {
2066 if (create_extra(self, NULL) < 0)
2067 return -1;
2068 }
2069 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002070 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002071 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002072}
2073
2074static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002075 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002076 0, /* sq_concat */
2077 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002078 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002079 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002080 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002081 0,
2082};
2083
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084/******************************* Element iterator ****************************/
2085
2086/* ElementIterObject represents the iteration state over an XML element in
2087 * pre-order traversal. To keep track of which sub-element should be returned
2088 * next, a stack of parents is maintained. This is a standard stack-based
2089 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002090 * The stack is managed using a continuous array.
2091 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092 * the current one is exhausted, and the next child to examine in that parent.
2093 */
2094typedef struct ParentLocator_t {
2095 ElementObject *parent;
2096 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002097} ParentLocator;
2098
2099typedef struct {
2100 PyObject_HEAD
2101 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002102 Py_ssize_t parent_stack_used;
2103 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002104 ElementObject *root_element;
2105 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002106 int gettext;
2107} ElementIterObject;
2108
2109
2110static void
2111elementiter_dealloc(ElementIterObject *it)
2112{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002113 Py_ssize_t i = it->parent_stack_used;
2114 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002115 /* bpo-31095: UnTrack is needed before calling any callbacks */
2116 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002117 while (i--)
2118 Py_XDECREF(it->parent_stack[i].parent);
2119 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120
2121 Py_XDECREF(it->sought_tag);
2122 Py_XDECREF(it->root_element);
2123
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 PyObject_GC_Del(it);
2125}
2126
2127static int
2128elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2129{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 Py_ssize_t i = it->parent_stack_used;
2131 while (i--)
2132 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133
2134 Py_VISIT(it->root_element);
2135 Py_VISIT(it->sought_tag);
2136 return 0;
2137}
2138
2139/* Helper function for elementiter_next. Add a new parent to the parent stack.
2140 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002141static int
2142parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002143{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002144 ParentLocator *item;
2145
2146 if (it->parent_stack_used >= it->parent_stack_size) {
2147 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2148 ParentLocator *parent_stack = it->parent_stack;
2149 PyMem_Resize(parent_stack, ParentLocator, new_size);
2150 if (parent_stack == NULL)
2151 return -1;
2152 it->parent_stack = parent_stack;
2153 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002155 item = it->parent_stack + it->parent_stack_used++;
2156 Py_INCREF(parent);
2157 item->parent = parent;
2158 item->child_index = 0;
2159 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160}
2161
2162static PyObject *
2163elementiter_next(ElementIterObject *it)
2164{
2165 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002166 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002167 * A short note on gettext: this function serves both the iter() and
2168 * itertext() methods to avoid code duplication. However, there are a few
2169 * small differences in the way these iterations work. Namely:
2170 * - itertext() only yields text from nodes that have it, and continues
2171 * iterating when a node doesn't have text (so it doesn't return any
2172 * node like iter())
2173 * - itertext() also has to handle tail, after finishing with all the
2174 * children of a node.
2175 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002176 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002177 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002178 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002179
2180 while (1) {
2181 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002182 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002183 * iterator is exhausted.
2184 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002185 if (!it->parent_stack_used) {
2186 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002187 PyErr_SetNone(PyExc_StopIteration);
2188 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002189 }
2190
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002191 elem = it->root_element; /* steals a reference */
2192 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002193 }
2194 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002195 /* See if there are children left to traverse in the current parent. If
2196 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002197 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002198 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2199 Py_ssize_t child_index = item->child_index;
2200 ElementObjectExtra *extra;
2201 elem = item->parent;
2202 extra = elem->extra;
2203 if (!extra || child_index >= extra->length) {
2204 it->parent_stack_used--;
2205 /* Note that extra condition on it->parent_stack_used here;
2206 * this is because itertext() is supposed to only return *inner*
2207 * text, not text following the element it began iteration with.
2208 */
2209 if (it->gettext && it->parent_stack_used) {
2210 text = element_get_tail(elem);
2211 goto gettext;
2212 }
2213 Py_DECREF(elem);
2214 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002215 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002216
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07002217 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002218 PyErr_Format(PyExc_AttributeError,
2219 "'%.100s' object has no attribute 'iter'",
2220 Py_TYPE(extra->children[child_index])->tp_name);
2221 return NULL;
2222 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002223 elem = (ElementObject *)extra->children[child_index];
2224 item->child_index++;
2225 Py_INCREF(elem);
2226 }
2227
2228 if (parent_stack_push_new(it, elem) < 0) {
2229 Py_DECREF(elem);
2230 PyErr_NoMemory();
2231 return NULL;
2232 }
2233 if (it->gettext) {
2234 text = element_get_text(elem);
2235 goto gettext;
2236 }
2237
2238 if (it->sought_tag == Py_None)
2239 return (PyObject *)elem;
2240
2241 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2242 if (rc > 0)
2243 return (PyObject *)elem;
2244
2245 Py_DECREF(elem);
2246 if (rc < 0)
2247 return NULL;
2248 continue;
2249
2250gettext:
2251 if (!text) {
2252 Py_DECREF(elem);
2253 return NULL;
2254 }
2255 if (text == Py_None) {
2256 Py_DECREF(elem);
2257 }
2258 else {
2259 Py_INCREF(text);
2260 Py_DECREF(elem);
2261 rc = PyObject_IsTrue(text);
2262 if (rc > 0)
2263 return text;
2264 Py_DECREF(text);
2265 if (rc < 0)
2266 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002267 }
2268 }
2269
2270 return NULL;
2271}
2272
2273
2274static PyTypeObject ElementIter_Type = {
2275 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002276 /* Using the module's name since the pure-Python implementation does not
2277 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002278 "_elementtree._element_iterator", /* tp_name */
2279 sizeof(ElementIterObject), /* tp_basicsize */
2280 0, /* tp_itemsize */
2281 /* methods */
2282 (destructor)elementiter_dealloc, /* tp_dealloc */
2283 0, /* tp_print */
2284 0, /* tp_getattr */
2285 0, /* tp_setattr */
2286 0, /* tp_reserved */
2287 0, /* tp_repr */
2288 0, /* tp_as_number */
2289 0, /* tp_as_sequence */
2290 0, /* tp_as_mapping */
2291 0, /* tp_hash */
2292 0, /* tp_call */
2293 0, /* tp_str */
2294 0, /* tp_getattro */
2295 0, /* tp_setattro */
2296 0, /* tp_as_buffer */
2297 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2298 0, /* tp_doc */
2299 (traverseproc)elementiter_traverse, /* tp_traverse */
2300 0, /* tp_clear */
2301 0, /* tp_richcompare */
2302 0, /* tp_weaklistoffset */
2303 PyObject_SelfIter, /* tp_iter */
2304 (iternextfunc)elementiter_next, /* tp_iternext */
2305 0, /* tp_methods */
2306 0, /* tp_members */
2307 0, /* tp_getset */
2308 0, /* tp_base */
2309 0, /* tp_dict */
2310 0, /* tp_descr_get */
2311 0, /* tp_descr_set */
2312 0, /* tp_dictoffset */
2313 0, /* tp_init */
2314 0, /* tp_alloc */
2315 0, /* tp_new */
2316};
2317
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002318#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002319
2320static PyObject *
2321create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2322{
2323 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002324
2325 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2326 if (!it)
2327 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002328
Victor Stinner4d463432013-07-11 23:05:03 +02002329 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002330 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002331 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002332 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002333 it->root_element = self;
2334
Eli Bendersky64d11e62012-06-15 07:42:50 +03002335 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002336
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002337 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002338 if (it->parent_stack == NULL) {
2339 Py_DECREF(it);
2340 PyErr_NoMemory();
2341 return NULL;
2342 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002343 it->parent_stack_used = 0;
2344 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002345
Eli Bendersky64d11e62012-06-15 07:42:50 +03002346 return (PyObject *)it;
2347}
2348
2349
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350/* ==================================================================== */
2351/* the tree builder type */
2352
2353typedef struct {
2354 PyObject_HEAD
2355
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002357
Antoine Pitrouee329312012-10-04 19:53:29 +02002358 PyObject *this; /* current node */
2359 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360
Eli Bendersky58d548d2012-05-29 15:45:16 +03002361 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002362
Eli Bendersky58d548d2012-05-29 15:45:16 +03002363 PyObject *stack; /* element stack */
2364 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365
Eli Bendersky48d358b2012-05-30 17:57:50 +03002366 PyObject *element_factory;
2367
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002369 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002370 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2371 PyObject *end_event_obj;
2372 PyObject *start_ns_event_obj;
2373 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374} TreeBuilderObject;
2375
Christian Heimes90aa7642007-12-19 02:45:37 +00002376#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377
2378/* -------------------------------------------------------------------- */
2379/* constructor and destructor */
2380
Eli Bendersky58d548d2012-05-29 15:45:16 +03002381static PyObject *
2382treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002383{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002384 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2385 if (t != NULL) {
2386 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002389 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002391 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392
Eli Bendersky58d548d2012-05-29 15:45:16 +03002393 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002394 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002395 t->stack = PyList_New(20);
2396 if (!t->stack) {
2397 Py_DECREF(t->this);
2398 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002399 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 return NULL;
2401 }
2402 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002403
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002404 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002405 t->start_event_obj = t->end_event_obj = NULL;
2406 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2407 }
2408 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409}
2410
Serhiy Storchakacb985562015-05-04 15:32:48 +03002411/*[clinic input]
2412_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002413
Serhiy Storchakacb985562015-05-04 15:32:48 +03002414 element_factory: object = NULL
2415
2416[clinic start generated code]*/
2417
2418static int
2419_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2420 PyObject *element_factory)
2421/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2422{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002423 if (element_factory) {
2424 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002425 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002426 }
2427
Eli Bendersky58d548d2012-05-29 15:45:16 +03002428 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429}
2430
Eli Bendersky48d358b2012-05-30 17:57:50 +03002431static int
2432treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2433{
Miss Islington (bot)60c919b2018-12-18 13:40:23 -08002434 Py_VISIT(self->end_ns_event_obj);
2435 Py_VISIT(self->start_ns_event_obj);
2436 Py_VISIT(self->end_event_obj);
2437 Py_VISIT(self->start_event_obj);
2438 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002439 Py_VISIT(self->root);
2440 Py_VISIT(self->this);
2441 Py_VISIT(self->last);
2442 Py_VISIT(self->data);
2443 Py_VISIT(self->stack);
2444 Py_VISIT(self->element_factory);
2445 return 0;
2446}
2447
2448static int
2449treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002450{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002451 Py_CLEAR(self->end_ns_event_obj);
2452 Py_CLEAR(self->start_ns_event_obj);
2453 Py_CLEAR(self->end_event_obj);
2454 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002455 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002456 Py_CLEAR(self->stack);
2457 Py_CLEAR(self->data);
2458 Py_CLEAR(self->last);
2459 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002460 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002461 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002462 return 0;
2463}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464
Eli Bendersky48d358b2012-05-30 17:57:50 +03002465static void
2466treebuilder_dealloc(TreeBuilderObject *self)
2467{
2468 PyObject_GC_UnTrack(self);
2469 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002470 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471}
2472
2473/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002474/* helpers for handling of arbitrary element-like objects */
2475
2476static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002477treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002478 PyObject **dest, _Py_Identifier *name)
2479{
2480 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002481 PyObject *tmp = JOIN_OBJ(*dest);
2482 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2483 *data = NULL;
2484 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002485 return 0;
2486 }
2487 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002488 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002489 int r;
2490 if (joined == NULL)
2491 return -1;
2492 r = _PyObject_SetAttrId(element, name, joined);
2493 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002494 if (r < 0)
2495 return -1;
2496 Py_CLEAR(*data);
2497 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002498 }
2499}
2500
Serhiy Storchaka576def02017-03-30 09:47:31 +03002501LOCAL(int)
2502treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002503{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002504 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002505
Serhiy Storchaka576def02017-03-30 09:47:31 +03002506 if (!self->data) {
2507 return 0;
2508 }
2509
2510 if (self->this == element) {
2511 _Py_IDENTIFIER(text);
2512 return treebuilder_set_element_text_or_tail(
2513 element, &self->data,
2514 &((ElementObject *) element)->text, &PyId_text);
2515 }
2516 else {
2517 _Py_IDENTIFIER(tail);
2518 return treebuilder_set_element_text_or_tail(
2519 element, &self->data,
2520 &((ElementObject *) element)->tail, &PyId_tail);
2521 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002522}
2523
2524static int
2525treebuilder_add_subelement(PyObject *element, PyObject *child)
2526{
2527 _Py_IDENTIFIER(append);
2528 if (Element_CheckExact(element)) {
2529 ElementObject *elem = (ElementObject *) element;
2530 return element_add_subelement(elem, child);
2531 }
2532 else {
2533 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002534 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002535 if (res == NULL)
2536 return -1;
2537 Py_DECREF(res);
2538 return 0;
2539 }
2540}
2541
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002542LOCAL(int)
2543treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2544 PyObject *node)
2545{
2546 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002547 PyObject *res;
2548 PyObject *event = PyTuple_Pack(2, action, node);
2549 if (event == NULL)
2550 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002551 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002552 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002553 if (res == NULL)
2554 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002555 Py_DECREF(res);
2556 }
2557 return 0;
2558}
2559
Antoine Pitrouee329312012-10-04 19:53:29 +02002560/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561/* handlers */
2562
2563LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2565 PyObject* attrib)
2566{
2567 PyObject* node;
2568 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002569 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570
Serhiy Storchaka576def02017-03-30 09:47:31 +03002571 if (treebuilder_flush_data(self) < 0) {
2572 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 }
2574
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002575 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002576 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002577 } else if (attrib == Py_None) {
2578 attrib = PyDict_New();
2579 if (!attrib)
2580 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002581 node = PyObject_CallFunctionObjArgs(self->element_factory,
2582 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002583 Py_DECREF(attrib);
2584 }
2585 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002586 node = PyObject_CallFunctionObjArgs(self->element_factory,
2587 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002588 }
2589 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002592
Antoine Pitrouee329312012-10-04 19:53:29 +02002593 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594
2595 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002596 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002597 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598 } else {
2599 if (self->root) {
2600 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002601 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 "multiple elements on top level"
2603 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002604 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002605 }
2606 Py_INCREF(node);
2607 self->root = node;
2608 }
2609
2610 if (self->index < PyList_GET_SIZE(self->stack)) {
2611 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002612 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613 Py_INCREF(this);
2614 } else {
2615 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002616 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 }
2618 self->index++;
2619
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002621 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002623 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002625 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2626 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627
2628 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002629
2630 error:
2631 Py_DECREF(node);
2632 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633}
2634
2635LOCAL(PyObject*)
2636treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2637{
2638 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002639 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002640 /* ignore calls to data before the first call to start */
2641 Py_RETURN_NONE;
2642 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 /* store the first item as is */
2644 Py_INCREF(data); self->data = data;
2645 } else {
2646 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002647 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2648 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002649 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650 /* expat often generates single character data sections; handle
2651 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002652 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2653 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002655 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656 } else if (PyList_CheckExact(self->data)) {
2657 if (PyList_Append(self->data, data) < 0)
2658 return NULL;
2659 } else {
2660 PyObject* list = PyList_New(2);
2661 if (!list)
2662 return NULL;
2663 PyList_SET_ITEM(list, 0, self->data);
2664 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2665 self->data = list;
2666 }
2667 }
2668
2669 Py_RETURN_NONE;
2670}
2671
2672LOCAL(PyObject*)
2673treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2674{
2675 PyObject* item;
2676
Serhiy Storchaka576def02017-03-30 09:47:31 +03002677 if (treebuilder_flush_data(self) < 0) {
2678 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679 }
2680
2681 if (self->index == 0) {
2682 PyErr_SetString(
2683 PyExc_IndexError,
2684 "pop from empty stack"
2685 );
2686 return NULL;
2687 }
2688
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002689 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002690 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002691 self->index--;
2692 self->this = PyList_GET_ITEM(self->stack, self->index);
2693 Py_INCREF(self->this);
2694 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002695
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002696 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2697 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698
2699 Py_INCREF(self->last);
2700 return (PyObject*) self->last;
2701}
2702
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703/* -------------------------------------------------------------------- */
2704/* methods (in alphabetical order) */
2705
Serhiy Storchakacb985562015-05-04 15:32:48 +03002706/*[clinic input]
2707_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708
Serhiy Storchakacb985562015-05-04 15:32:48 +03002709 data: object
2710 /
2711
2712[clinic start generated code]*/
2713
2714static PyObject *
2715_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2716/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2717{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718 return treebuilder_handle_data(self, data);
2719}
2720
Serhiy Storchakacb985562015-05-04 15:32:48 +03002721/*[clinic input]
2722_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723
Serhiy Storchakacb985562015-05-04 15:32:48 +03002724 tag: object
2725 /
2726
2727[clinic start generated code]*/
2728
2729static PyObject *
2730_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2731/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2732{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733 return treebuilder_handle_end(self, tag);
2734}
2735
2736LOCAL(PyObject*)
2737treebuilder_done(TreeBuilderObject* self)
2738{
2739 PyObject* res;
2740
2741 /* FIXME: check stack size? */
2742
2743 if (self->root)
2744 res = self->root;
2745 else
2746 res = Py_None;
2747
2748 Py_INCREF(res);
2749 return res;
2750}
2751
Serhiy Storchakacb985562015-05-04 15:32:48 +03002752/*[clinic input]
2753_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754
Serhiy Storchakacb985562015-05-04 15:32:48 +03002755[clinic start generated code]*/
2756
2757static PyObject *
2758_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2759/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2760{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 return treebuilder_done(self);
2762}
2763
Serhiy Storchakacb985562015-05-04 15:32:48 +03002764/*[clinic input]
2765_elementtree.TreeBuilder.start
2766
2767 tag: object
2768 attrs: object = None
2769 /
2770
2771[clinic start generated code]*/
2772
2773static PyObject *
2774_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2775 PyObject *attrs)
2776/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002778 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779}
2780
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781/* ==================================================================== */
2782/* the expat interface */
2783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002786
2787/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2788 * cached globally without being in per-module state.
2789 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002790static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792
Eli Bendersky52467b12012-06-01 07:13:08 +03002793static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2794 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796typedef struct {
2797 PyObject_HEAD
2798
2799 XML_Parser parser;
2800
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002801 PyObject *target;
2802 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002804 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002806 PyObject *handle_start;
2807 PyObject *handle_data;
2808 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002810 PyObject *handle_comment;
2811 PyObject *handle_pi;
2812 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002814 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002815
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816} XMLParserObject;
2817
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002818static PyObject*
Serhiy Storchakaa5552f02017-12-15 13:11:11 +02002819_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002820static PyObject *
2821_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2822 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824/* helpers */
2825
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826LOCAL(PyObject*)
2827makeuniversal(XMLParserObject* self, const char* string)
2828{
2829 /* convert a UTF-8 tag/attribute name from the expat parser
2830 to a universal name string */
2831
Antoine Pitrouc1948842012-10-01 23:40:37 +02002832 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002833 PyObject* key;
2834 PyObject* value;
2835
2836 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002837 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002838 if (!key)
2839 return NULL;
2840
2841 value = PyDict_GetItem(self->names, key);
2842
2843 if (value) {
2844 Py_INCREF(value);
2845 } else {
2846 /* new name. convert to universal name, and decode as
2847 necessary */
2848
2849 PyObject* tag;
2850 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002851 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002852
2853 /* look for namespace separator */
2854 for (i = 0; i < size; i++)
2855 if (string[i] == '}')
2856 break;
2857 if (i != size) {
2858 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002859 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002860 if (tag == NULL) {
2861 Py_DECREF(key);
2862 return NULL;
2863 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002864 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865 p[0] = '{';
2866 memcpy(p+1, string, size);
2867 size++;
2868 } else {
2869 /* plain name; use key as tag */
2870 Py_INCREF(key);
2871 tag = key;
2872 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002873
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002874 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002875 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002876 value = PyUnicode_DecodeUTF8(p, size, "strict");
2877 Py_DECREF(tag);
2878 if (!value) {
2879 Py_DECREF(key);
2880 return NULL;
2881 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002882
2883 /* add to names dictionary */
2884 if (PyDict_SetItem(self->names, key, value) < 0) {
2885 Py_DECREF(key);
2886 Py_DECREF(value);
2887 return NULL;
2888 }
2889 }
2890
2891 Py_DECREF(key);
2892 return value;
2893}
2894
Eli Bendersky5b77d812012-03-16 08:20:05 +02002895/* Set the ParseError exception with the given parameters.
2896 * If message is not NULL, it's used as the error string. Otherwise, the
2897 * message string is the default for the given error_code.
2898*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002899static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002900expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2901 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002902{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002903 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002904 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002905
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002906 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 message ? message : EXPAT(ErrorString)(error_code),
2908 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002909 if (errmsg == NULL)
2910 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002911
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002912 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002913 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002914 if (!error)
2915 return;
2916
Eli Bendersky5b77d812012-03-16 08:20:05 +02002917 /* Add code and position attributes */
2918 code = PyLong_FromLong((long)error_code);
2919 if (!code) {
2920 Py_DECREF(error);
2921 return;
2922 }
2923 if (PyObject_SetAttrString(error, "code", code) == -1) {
2924 Py_DECREF(error);
2925 Py_DECREF(code);
2926 return;
2927 }
2928 Py_DECREF(code);
2929
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002930 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002931 if (!position) {
2932 Py_DECREF(error);
2933 return;
2934 }
2935 if (PyObject_SetAttrString(error, "position", position) == -1) {
2936 Py_DECREF(error);
2937 Py_DECREF(position);
2938 return;
2939 }
2940 Py_DECREF(position);
2941
Eli Bendersky532d03e2013-08-10 08:00:39 -07002942 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002943 Py_DECREF(error);
2944}
2945
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946/* -------------------------------------------------------------------- */
2947/* handlers */
2948
2949static void
2950expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2951 int data_len)
2952{
2953 PyObject* key;
2954 PyObject* value;
2955 PyObject* res;
2956
2957 if (data_len < 2 || data_in[0] != '&')
2958 return;
2959
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002960 if (PyErr_Occurred())
2961 return;
2962
Neal Norwitz0269b912007-08-08 06:56:02 +00002963 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 if (!key)
2965 return;
2966
2967 value = PyDict_GetItem(self->entity, key);
2968
2969 if (value) {
2970 if (TreeBuilder_CheckExact(self->target))
2971 res = treebuilder_handle_data(
2972 (TreeBuilderObject*) self->target, value
2973 );
2974 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002975 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 else
2977 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002979 } else if (!PyErr_Occurred()) {
2980 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002981 char message[128] = "undefined entity ";
2982 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002983 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002984 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002986 EXPAT(GetErrorColumnNumber)(self->parser),
2987 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 );
2989 }
2990
2991 Py_DECREF(key);
2992}
2993
2994static void
2995expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2996 const XML_Char **attrib_in)
2997{
2998 PyObject* res;
2999 PyObject* tag;
3000 PyObject* attrib;
3001 int ok;
3002
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003003 if (PyErr_Occurred())
3004 return;
3005
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 /* tag name */
3007 tag = makeuniversal(self, tag_in);
3008 if (!tag)
3009 return; /* parser will look for errors */
3010
3011 /* attributes */
3012 if (attrib_in[0]) {
3013 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003014 if (!attrib) {
3015 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003017 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 while (attrib_in[0] && attrib_in[1]) {
3019 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003020 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021 if (!key || !value) {
3022 Py_XDECREF(value);
3023 Py_XDECREF(key);
3024 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003025 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 return;
3027 }
3028 ok = PyDict_SetItem(attrib, key, value);
3029 Py_DECREF(value);
3030 Py_DECREF(key);
3031 if (ok < 0) {
3032 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003033 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 return;
3035 }
3036 attrib_in += 2;
3037 }
3038 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003039 Py_INCREF(Py_None);
3040 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003041 }
3042
3043 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003044 /* shortcut */
3045 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3046 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003047 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003048 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003049 if (attrib == Py_None) {
3050 Py_DECREF(attrib);
3051 attrib = PyDict_New();
3052 if (!attrib) {
3053 Py_DECREF(tag);
3054 return;
3055 }
3056 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003057 res = PyObject_CallFunctionObjArgs(self->handle_start,
3058 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003059 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 res = NULL;
3061
3062 Py_DECREF(tag);
3063 Py_DECREF(attrib);
3064
3065 Py_XDECREF(res);
3066}
3067
3068static void
3069expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3070 int data_len)
3071{
3072 PyObject* data;
3073 PyObject* res;
3074
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003075 if (PyErr_Occurred())
3076 return;
3077
Neal Norwitz0269b912007-08-08 06:56:02 +00003078 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003079 if (!data)
3080 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003081
3082 if (TreeBuilder_CheckExact(self->target))
3083 /* shortcut */
3084 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3085 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003086 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 else
3088 res = NULL;
3089
3090 Py_DECREF(data);
3091
3092 Py_XDECREF(res);
3093}
3094
3095static void
3096expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3097{
3098 PyObject* tag;
3099 PyObject* res = NULL;
3100
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003101 if (PyErr_Occurred())
3102 return;
3103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 if (TreeBuilder_CheckExact(self->target))
3105 /* shortcut */
3106 /* the standard tree builder doesn't look at the end tag */
3107 res = treebuilder_handle_end(
3108 (TreeBuilderObject*) self->target, Py_None
3109 );
3110 else if (self->handle_end) {
3111 tag = makeuniversal(self, tag_in);
3112 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003113 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114 Py_DECREF(tag);
3115 }
3116 }
3117
3118 Py_XDECREF(res);
3119}
3120
3121static void
3122expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3123 const XML_Char *uri)
3124{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003125 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3126 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003127
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003128 if (PyErr_Occurred())
3129 return;
3130
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003131 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003132 return;
3133
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003134 if (!uri)
3135 uri = "";
3136 if (!prefix)
3137 prefix = "";
3138
3139 parcel = Py_BuildValue("ss", prefix, uri);
3140 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003141 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003142 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3143 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003144}
3145
3146static void
3147expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3148{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003149 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3150
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003151 if (PyErr_Occurred())
3152 return;
3153
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003154 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003155 return;
3156
3157 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003158}
3159
3160static void
3161expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3162{
3163 PyObject* comment;
3164 PyObject* res;
3165
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003166 if (PyErr_Occurred())
3167 return;
3168
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003170 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003171 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003172 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3173 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174 Py_XDECREF(res);
3175 Py_DECREF(comment);
3176 }
3177 }
3178}
3179
Eli Bendersky45839902013-01-13 05:14:47 -08003180static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003181expat_start_doctype_handler(XMLParserObject *self,
3182 const XML_Char *doctype_name,
3183 const XML_Char *sysid,
3184 const XML_Char *pubid,
3185 int has_internal_subset)
3186{
3187 PyObject *self_pyobj = (PyObject *)self;
3188 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3189 PyObject *parser_doctype = NULL;
3190 PyObject *res = NULL;
3191
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003192 if (PyErr_Occurred())
3193 return;
3194
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003195 doctype_name_obj = makeuniversal(self, doctype_name);
3196 if (!doctype_name_obj)
3197 return;
3198
3199 if (sysid) {
3200 sysid_obj = makeuniversal(self, sysid);
3201 if (!sysid_obj) {
3202 Py_DECREF(doctype_name_obj);
3203 return;
3204 }
3205 } else {
3206 Py_INCREF(Py_None);
3207 sysid_obj = Py_None;
3208 }
3209
3210 if (pubid) {
3211 pubid_obj = makeuniversal(self, pubid);
3212 if (!pubid_obj) {
3213 Py_DECREF(doctype_name_obj);
3214 Py_DECREF(sysid_obj);
3215 return;
3216 }
3217 } else {
3218 Py_INCREF(Py_None);
3219 pubid_obj = Py_None;
3220 }
3221
3222 /* If the target has a handler for doctype, call it. */
3223 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003224 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3225 doctype_name_obj, pubid_obj,
3226 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003227 Py_CLEAR(res);
3228 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003229 else {
3230 /* Now see if the parser itself has a doctype method. If yes and it's
3231 * a custom method, call it but warn about deprecation. If it's only
3232 * the vanilla XMLParser method, do nothing.
3233 */
3234 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3235 if (parser_doctype &&
3236 !(PyCFunction_Check(parser_doctype) &&
3237 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3238 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003239 (PyCFunction) _elementtree_XMLParser_doctype)) {
3240 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3241 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003242 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003243 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003244 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003245 res = PyObject_CallFunctionObjArgs(parser_doctype,
3246 doctype_name_obj, pubid_obj,
3247 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003248 Py_CLEAR(res);
3249 }
3250 }
3251
3252clear:
3253 Py_XDECREF(parser_doctype);
3254 Py_DECREF(doctype_name_obj);
3255 Py_DECREF(pubid_obj);
3256 Py_DECREF(sysid_obj);
3257}
3258
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259static void
3260expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3261 const XML_Char* data_in)
3262{
3263 PyObject* target;
3264 PyObject* data;
3265 PyObject* res;
3266
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003267 if (PyErr_Occurred())
3268 return;
3269
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003271 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3272 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003274 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3275 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 Py_XDECREF(res);
3277 Py_DECREF(data);
3278 Py_DECREF(target);
3279 } else {
3280 Py_XDECREF(data);
3281 Py_XDECREF(target);
3282 }
3283 }
3284}
3285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287
Eli Bendersky52467b12012-06-01 07:13:08 +03003288static PyObject *
3289xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290{
Eli Bendersky52467b12012-06-01 07:13:08 +03003291 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3292 if (self) {
3293 self->parser = NULL;
3294 self->target = self->entity = self->names = NULL;
3295 self->handle_start = self->handle_data = self->handle_end = NULL;
3296 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003297 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003299 return (PyObject *)self;
3300}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301
scoderc8d8e152017-09-14 22:00:03 +02003302static int
3303ignore_attribute_error(PyObject *value)
3304{
3305 if (value == NULL) {
3306 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3307 return -1;
3308 }
3309 PyErr_Clear();
3310 }
3311 return 0;
3312}
3313
Serhiy Storchakacb985562015-05-04 15:32:48 +03003314/*[clinic input]
3315_elementtree.XMLParser.__init__
3316
3317 html: object = NULL
3318 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003319 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320
3321[clinic start generated code]*/
3322
Eli Bendersky52467b12012-06-01 07:13:08 +03003323static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003324_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3325 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003326/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003327{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003328 if (html != NULL) {
3329 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3330 "The html argument of XMLParser() is deprecated",
3331 1) < 0) {
3332 return -1;
3333 }
3334 }
3335
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->entity = PyDict_New();
3337 if (!self->entity)
3338 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 self->names = PyDict_New();
3341 if (!self->names) {
3342 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003343 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003345
Serhiy Storchakacb985562015-05-04 15:32:48 +03003346 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3347 if (!self->parser) {
3348 Py_CLEAR(self->entity);
3349 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003351 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352 }
Miss Islington (bot)470a4352018-09-18 06:11:09 -07003353 /* expat < 2.1.0 has no XML_SetHashSalt() */
3354 if (EXPAT(SetHashSalt) != NULL) {
3355 EXPAT(SetHashSalt)(self->parser,
3356 (unsigned long)_Py_HashSecret.expat.hashsalt);
3357 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358
Eli Bendersky52467b12012-06-01 07:13:08 +03003359 if (target) {
3360 Py_INCREF(target);
3361 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003362 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003363 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003364 Py_CLEAR(self->entity);
3365 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003366 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003368 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370
Serhiy Storchakacb985562015-05-04 15:32:48 +03003371 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003372 if (ignore_attribute_error(self->handle_start)) {
3373 return -1;
3374 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003375 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003376 if (ignore_attribute_error(self->handle_data)) {
3377 return -1;
3378 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003380 if (ignore_attribute_error(self->handle_end)) {
3381 return -1;
3382 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003383 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003384 if (ignore_attribute_error(self->handle_comment)) {
3385 return -1;
3386 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003388 if (ignore_attribute_error(self->handle_pi)) {
3389 return -1;
3390 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003392 if (ignore_attribute_error(self->handle_close)) {
3393 return -1;
3394 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003395 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003396 if (ignore_attribute_error(self->handle_doctype)) {
3397 return -1;
3398 }
Eli Bendersky45839902013-01-13 05:14:47 -08003399
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003401 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003403 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404 (XML_StartElementHandler) expat_start_handler,
3405 (XML_EndElementHandler) expat_end_handler
3406 );
3407 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003408 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409 (XML_DefaultHandler) expat_default_handler
3410 );
3411 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003412 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 (XML_CharacterDataHandler) expat_data_handler
3414 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003415 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003417 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003418 (XML_CommentHandler) expat_comment_handler
3419 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003422 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003423 (XML_ProcessingInstructionHandler) expat_pi_handler
3424 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003425 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003426 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003427 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3428 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003429 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003430 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003431 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003432 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433
Eli Bendersky52467b12012-06-01 07:13:08 +03003434 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003435}
3436
Eli Bendersky52467b12012-06-01 07:13:08 +03003437static int
3438xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3439{
3440 Py_VISIT(self->handle_close);
3441 Py_VISIT(self->handle_pi);
3442 Py_VISIT(self->handle_comment);
3443 Py_VISIT(self->handle_end);
3444 Py_VISIT(self->handle_data);
3445 Py_VISIT(self->handle_start);
3446
3447 Py_VISIT(self->target);
3448 Py_VISIT(self->entity);
3449 Py_VISIT(self->names);
3450
3451 return 0;
3452}
3453
3454static int
3455xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456{
Victor Stinnere727d412017-09-18 05:29:37 -07003457 if (self->parser != NULL) {
3458 XML_Parser parser = self->parser;
3459 self->parser = NULL;
3460 EXPAT(ParserFree)(parser);
3461 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462
Antoine Pitrouc1948842012-10-01 23:40:37 +02003463 Py_CLEAR(self->handle_close);
3464 Py_CLEAR(self->handle_pi);
3465 Py_CLEAR(self->handle_comment);
3466 Py_CLEAR(self->handle_end);
3467 Py_CLEAR(self->handle_data);
3468 Py_CLEAR(self->handle_start);
3469 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003470
Antoine Pitrouc1948842012-10-01 23:40:37 +02003471 Py_CLEAR(self->target);
3472 Py_CLEAR(self->entity);
3473 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474
Eli Bendersky52467b12012-06-01 07:13:08 +03003475 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476}
3477
Eli Bendersky52467b12012-06-01 07:13:08 +03003478static void
3479xmlparser_dealloc(XMLParserObject* self)
3480{
3481 PyObject_GC_UnTrack(self);
3482 xmlparser_gc_clear(self);
3483 Py_TYPE(self)->tp_free((PyObject *)self);
3484}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485
3486LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003487expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488{
3489 int ok;
3490
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003491 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3493
3494 if (PyErr_Occurred())
3495 return NULL;
3496
3497 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003498 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003499 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003500 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003501 EXPAT(GetErrorColumnNumber)(self->parser),
3502 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503 );
3504 return NULL;
3505 }
3506
3507 Py_RETURN_NONE;
3508}
3509
Serhiy Storchakacb985562015-05-04 15:32:48 +03003510/*[clinic input]
3511_elementtree.XMLParser.close
3512
3513[clinic start generated code]*/
3514
3515static PyObject *
3516_elementtree_XMLParser_close_impl(XMLParserObject *self)
3517/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518{
3519 /* end feeding data to parser */
3520
3521 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003522 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003523 if (!res)
3524 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003526 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 Py_DECREF(res);
3528 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003529 }
3530 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003531 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003532 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003533 }
3534 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003535 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003537}
3538
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539/*[clinic input]
3540_elementtree.XMLParser.feed
3541
3542 data: object
3543 /
3544
3545[clinic start generated code]*/
3546
3547static PyObject *
3548_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3549/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550{
3551 /* feed data to parser */
3552
Serhiy Storchakacb985562015-05-04 15:32:48 +03003553 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003554 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003555 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3556 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003557 return NULL;
3558 if (data_len > INT_MAX) {
3559 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3560 return NULL;
3561 }
3562 /* Explicitly set UTF-8 encoding. Return code ignored. */
3563 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003564 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003565 }
3566 else {
3567 Py_buffer view;
3568 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003569 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003570 return NULL;
3571 if (view.len > INT_MAX) {
3572 PyBuffer_Release(&view);
3573 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3574 return NULL;
3575 }
3576 res = expat_parse(self, view.buf, (int)view.len, 0);
3577 PyBuffer_Release(&view);
3578 return res;
3579 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580}
3581
Serhiy Storchakacb985562015-05-04 15:32:48 +03003582/*[clinic input]
3583_elementtree.XMLParser._parse_whole
3584
3585 file: object
3586 /
3587
3588[clinic start generated code]*/
3589
3590static PyObject *
3591_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3592/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593{
Eli Benderskya3699232013-05-19 18:47:23 -07003594 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595 PyObject* reader;
3596 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003597 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 PyObject* res;
3599
Serhiy Storchakacb985562015-05-04 15:32:48 +03003600 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003601 if (!reader)
3602 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003603
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604 /* read from open file object */
3605 for (;;) {
3606
3607 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3608
3609 if (!buffer) {
3610 /* read failed (e.g. due to KeyboardInterrupt) */
3611 Py_DECREF(reader);
3612 return NULL;
3613 }
3614
Eli Benderskyf996e772012-03-16 05:53:30 +02003615 if (PyUnicode_CheckExact(buffer)) {
3616 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003617 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003618 Py_DECREF(buffer);
3619 break;
3620 }
3621 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003622 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003623 if (!temp) {
3624 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003625 Py_DECREF(reader);
3626 return NULL;
3627 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003628 buffer = temp;
3629 }
3630 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 Py_DECREF(buffer);
3632 break;
3633 }
3634
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003635 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3636 Py_DECREF(buffer);
3637 Py_DECREF(reader);
3638 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3639 return NULL;
3640 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003642 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643 );
3644
3645 Py_DECREF(buffer);
3646
3647 if (!res) {
3648 Py_DECREF(reader);
3649 return NULL;
3650 }
3651 Py_DECREF(res);
3652
3653 }
3654
3655 Py_DECREF(reader);
3656
3657 res = expat_parse(self, "", 0, 1);
3658
3659 if (res && TreeBuilder_CheckExact(self->target)) {
3660 Py_DECREF(res);
3661 return treebuilder_done((TreeBuilderObject*) self->target);
3662 }
3663
3664 return res;
3665}
3666
Serhiy Storchakacb985562015-05-04 15:32:48 +03003667/*[clinic input]
3668_elementtree.XMLParser.doctype
3669
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003670 name: object
3671 pubid: object
3672 system: object
3673 /
3674
Serhiy Storchakacb985562015-05-04 15:32:48 +03003675[clinic start generated code]*/
3676
3677static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003678_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3679 PyObject *pubid, PyObject *system)
3680/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003681{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003682 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3683 "This method of XMLParser is deprecated. Define"
3684 " doctype() method on the TreeBuilder target.",
3685 1) < 0) {
3686 return NULL;
3687 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003688 Py_RETURN_NONE;
3689}
3690
Serhiy Storchakacb985562015-05-04 15:32:48 +03003691/*[clinic input]
3692_elementtree.XMLParser._setevents
3693
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003694 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003695 events_to_report: object = None
3696 /
3697
3698[clinic start generated code]*/
3699
3700static PyObject *
3701_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3702 PyObject *events_queue,
3703 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003704/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003705{
3706 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003707 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003708 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003709 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003710
3711 if (!TreeBuilder_CheckExact(self->target)) {
3712 PyErr_SetString(
3713 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003714 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003715 "targets"
3716 );
3717 return NULL;
3718 }
3719
3720 target = (TreeBuilderObject*) self->target;
3721
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003722 events_append = PyObject_GetAttrString(events_queue, "append");
3723 if (events_append == NULL)
3724 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003725 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003726
3727 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003728 Py_CLEAR(target->start_event_obj);
3729 Py_CLEAR(target->end_event_obj);
3730 Py_CLEAR(target->start_ns_event_obj);
3731 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003732
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003733 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003735 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003736 Py_RETURN_NONE;
3737 }
3738
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003739 if (!(events_seq = PySequence_Fast(events_to_report,
3740 "events must be a sequence"))) {
3741 return NULL;
3742 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003743
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003744 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003745 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003746 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003747 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003748 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003749 } else if (PyBytes_Check(event_name_obj)) {
3750 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003751 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003752 if (event_name == NULL) {
3753 Py_DECREF(events_seq);
3754 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3755 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003756 }
3757
3758 Py_INCREF(event_name_obj);
3759 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003760 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003761 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003762 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003763 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003764 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003765 EXPAT(SetNamespaceDeclHandler)(
3766 self->parser,
3767 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3768 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3769 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003770 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003771 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003772 EXPAT(SetNamespaceDeclHandler)(
3773 self->parser,
3774 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3775 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3776 );
3777 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003778 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003779 Py_DECREF(events_seq);
3780 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003781 return NULL;
3782 }
3783 }
3784
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003785 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003786 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003787}
3788
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003789static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003790xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003791{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003792 if (PyUnicode_Check(nameobj)) {
3793 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003794 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003795 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003796 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003797 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003798 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003799 return PyUnicode_FromFormat(
3800 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003801 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003802 }
3803 else
3804 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003805
Alexander Belopolskye239d232010-12-08 23:31:48 +00003806 Py_INCREF(res);
3807 return res;
3808 }
3809 generic:
3810 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003811}
3812
Serhiy Storchakacb985562015-05-04 15:32:48 +03003813#include "clinic/_elementtree.c.h"
3814
3815static PyMethodDef element_methods[] = {
3816
3817 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3818
3819 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3820 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3821
3822 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3823 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3824 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3825
3826 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3827 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3828 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3829 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3830
3831 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3832 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3833 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3834
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003835 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003836 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3837
3838 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3839 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3840
3841 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3842
3843 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3844 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3845 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3846 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3847 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3848
3849 {NULL, NULL}
3850};
3851
3852static PyMappingMethods element_as_mapping = {
3853 (lenfunc) element_length,
3854 (binaryfunc) element_subscr,
3855 (objobjargproc) element_ass_subscr,
3856};
3857
Serhiy Storchakadde08152015-11-25 15:28:13 +02003858static PyGetSetDef element_getsetlist[] = {
3859 {"tag",
3860 (getter)element_tag_getter,
3861 (setter)element_tag_setter,
3862 "A string identifying what kind of data this element represents"},
3863 {"text",
3864 (getter)element_text_getter,
3865 (setter)element_text_setter,
3866 "A string of text directly after the start tag, or None"},
3867 {"tail",
3868 (getter)element_tail_getter,
3869 (setter)element_tail_setter,
3870 "A string of text directly after the end tag, or None"},
3871 {"attrib",
3872 (getter)element_attrib_getter,
3873 (setter)element_attrib_setter,
3874 "A dictionary containing the element's attributes"},
3875 {NULL},
3876};
3877
Serhiy Storchakacb985562015-05-04 15:32:48 +03003878static PyTypeObject Element_Type = {
3879 PyVarObject_HEAD_INIT(NULL, 0)
3880 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3881 /* methods */
3882 (destructor)element_dealloc, /* tp_dealloc */
3883 0, /* tp_print */
3884 0, /* tp_getattr */
3885 0, /* tp_setattr */
3886 0, /* tp_reserved */
3887 (reprfunc)element_repr, /* tp_repr */
3888 0, /* tp_as_number */
3889 &element_as_sequence, /* tp_as_sequence */
3890 &element_as_mapping, /* tp_as_mapping */
3891 0, /* tp_hash */
3892 0, /* tp_call */
3893 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003894 PyObject_GenericGetAttr, /* tp_getattro */
3895 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003896 0, /* tp_as_buffer */
3897 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3898 /* tp_flags */
3899 0, /* tp_doc */
3900 (traverseproc)element_gc_traverse, /* tp_traverse */
3901 (inquiry)element_gc_clear, /* tp_clear */
3902 0, /* tp_richcompare */
3903 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3904 0, /* tp_iter */
3905 0, /* tp_iternext */
3906 element_methods, /* tp_methods */
3907 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003908 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003909 0, /* tp_base */
3910 0, /* tp_dict */
3911 0, /* tp_descr_get */
3912 0, /* tp_descr_set */
3913 0, /* tp_dictoffset */
3914 (initproc)element_init, /* tp_init */
3915 PyType_GenericAlloc, /* tp_alloc */
3916 element_new, /* tp_new */
3917 0, /* tp_free */
3918};
3919
3920static PyMethodDef treebuilder_methods[] = {
3921 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3922 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3923 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3924 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3925 {NULL, NULL}
3926};
3927
3928static PyTypeObject TreeBuilder_Type = {
3929 PyVarObject_HEAD_INIT(NULL, 0)
3930 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3931 /* methods */
3932 (destructor)treebuilder_dealloc, /* tp_dealloc */
3933 0, /* tp_print */
3934 0, /* tp_getattr */
3935 0, /* tp_setattr */
3936 0, /* tp_reserved */
3937 0, /* tp_repr */
3938 0, /* tp_as_number */
3939 0, /* tp_as_sequence */
3940 0, /* tp_as_mapping */
3941 0, /* tp_hash */
3942 0, /* tp_call */
3943 0, /* tp_str */
3944 0, /* tp_getattro */
3945 0, /* tp_setattro */
3946 0, /* tp_as_buffer */
3947 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3948 /* tp_flags */
3949 0, /* tp_doc */
3950 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3951 (inquiry)treebuilder_gc_clear, /* tp_clear */
3952 0, /* tp_richcompare */
3953 0, /* tp_weaklistoffset */
3954 0, /* tp_iter */
3955 0, /* tp_iternext */
3956 treebuilder_methods, /* tp_methods */
3957 0, /* tp_members */
3958 0, /* tp_getset */
3959 0, /* tp_base */
3960 0, /* tp_dict */
3961 0, /* tp_descr_get */
3962 0, /* tp_descr_set */
3963 0, /* tp_dictoffset */
3964 _elementtree_TreeBuilder___init__, /* tp_init */
3965 PyType_GenericAlloc, /* tp_alloc */
3966 treebuilder_new, /* tp_new */
3967 0, /* tp_free */
3968};
3969
3970static PyMethodDef xmlparser_methods[] = {
3971 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3972 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3973 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3974 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3975 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3976 {NULL, NULL}
3977};
3978
Neal Norwitz227b5332006-03-22 09:28:35 +00003979static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003980 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003981 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003982 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003983 (destructor)xmlparser_dealloc, /* tp_dealloc */
3984 0, /* tp_print */
3985 0, /* tp_getattr */
3986 0, /* tp_setattr */
3987 0, /* tp_reserved */
3988 0, /* tp_repr */
3989 0, /* tp_as_number */
3990 0, /* tp_as_sequence */
3991 0, /* tp_as_mapping */
3992 0, /* tp_hash */
3993 0, /* tp_call */
3994 0, /* tp_str */
3995 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3996 0, /* tp_setattro */
3997 0, /* tp_as_buffer */
3998 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3999 /* tp_flags */
4000 0, /* tp_doc */
4001 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4002 (inquiry)xmlparser_gc_clear, /* tp_clear */
4003 0, /* tp_richcompare */
4004 0, /* tp_weaklistoffset */
4005 0, /* tp_iter */
4006 0, /* tp_iternext */
4007 xmlparser_methods, /* tp_methods */
4008 0, /* tp_members */
4009 0, /* tp_getset */
4010 0, /* tp_base */
4011 0, /* tp_dict */
4012 0, /* tp_descr_get */
4013 0, /* tp_descr_set */
4014 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004015 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004016 PyType_GenericAlloc, /* tp_alloc */
4017 xmlparser_new, /* tp_new */
4018 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004019};
4020
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004021/* ==================================================================== */
4022/* python module interface */
4023
4024static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08004025 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004026 {NULL, NULL}
4027};
4028
Martin v. Löwis1a214512008-06-11 05:26:20 +00004029
Eli Bendersky532d03e2013-08-10 08:00:39 -07004030static struct PyModuleDef elementtreemodule = {
4031 PyModuleDef_HEAD_INIT,
4032 "_elementtree",
4033 NULL,
4034 sizeof(elementtreestate),
4035 _functions,
4036 NULL,
4037 elementtree_traverse,
4038 elementtree_clear,
4039 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004040};
4041
Neal Norwitzf6657e62006-12-28 04:47:50 +00004042PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004043PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004044{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004045 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004046 elementtreestate *st;
4047
4048 m = PyState_FindModule(&elementtreemodule);
4049 if (m) {
4050 Py_INCREF(m);
4051 return m;
4052 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004053
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004054 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004055 if (PyType_Ready(&ElementIter_Type) < 0)
4056 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004057 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004058 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004059 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004060 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004061 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004062 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004063
Eli Bendersky532d03e2013-08-10 08:00:39 -07004064 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004065 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004066 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004067 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004068
Eli Bendersky828efde2012-04-05 05:40:58 +03004069 if (!(temp = PyImport_ImportModule("copy")))
4070 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004071 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004072 Py_XDECREF(temp);
4073
Victor Stinnerb136f112017-07-10 22:28:02 +02004074 if (st->deepcopy_obj == NULL) {
4075 return NULL;
4076 }
4077
4078 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004079 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004080 return NULL;
4081
Eli Bendersky20d41742012-06-01 09:48:37 +03004082 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004083 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4084 if (expat_capi) {
4085 /* check that it's usable */
4086 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004087 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004088 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4089 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004090 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004091 PyErr_SetString(PyExc_ImportError,
4092 "pyexpat version is incompatible");
4093 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004094 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004095 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004096 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004097 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004098
Eli Bendersky532d03e2013-08-10 08:00:39 -07004099 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004100 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004101 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004102 Py_INCREF(st->parseerror_obj);
4103 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004104
Eli Bendersky092af1f2012-03-04 07:14:03 +02004105 Py_INCREF((PyObject *)&Element_Type);
4106 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4107
Eli Bendersky58d548d2012-05-29 15:45:16 +03004108 Py_INCREF((PyObject *)&TreeBuilder_Type);
4109 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4110
Eli Bendersky52467b12012-06-01 07:13:08 +03004111 Py_INCREF((PyObject *)&XMLParser_Type);
4112 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004113
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004114 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004115}