blob: 79f1ccd68565745e8b378dbdcb8baf2ba813dea6 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Miss Islington (bot)b1c80032018-10-14 00:55:49 -0700207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700339 if (attrib_str == NULL) {
340 return NULL;
341 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Miss Islington (bot)62674f32018-12-10 23:05:13 -0800355 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
356 Py_DECREF(attrib);
357 attrib = NULL;
358 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300359 } else {
360 attrib = PyDict_New();
361 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700362
363 Py_DECREF(attrib_str);
364
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700365 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
366 Py_DECREF(attrib);
367 return NULL;
368 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300369 return attrib;
370}
371
Serhiy Storchakacb985562015-05-04 15:32:48 +0300372/*[clinic input]
373module _elementtree
374class _elementtree.Element "ElementObject *" "&Element_Type"
375class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
376class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
377[clinic start generated code]*/
378/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
379
Eli Bendersky092af1f2012-03-04 07:14:03 +0200380static int
381element_init(PyObject *self, PyObject *args, PyObject *kwds)
382{
383 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 PyObject *attrib = NULL;
385 ElementObject *self_elem;
386
387 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
388 return -1;
389
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 if (attrib) {
391 /* attrib passed as positional arg */
392 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200393 if (!attrib)
394 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 if (kwds) {
396 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200397 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300398 return -1;
399 }
400 }
401 } else if (kwds) {
402 /* have keywords args */
403 attrib = get_attrib_from_keywords(kwds);
404 if (!attrib)
405 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 }
407
408 self_elem = (ElementObject *)self;
409
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 return -1;
414 }
415 }
416
Eli Bendersky48d358b2012-05-30 17:57:50 +0300417 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200418 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419
420 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300422 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300425 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
Eli Bendersky092af1f2012-03-04 07:14:03 +0200427 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300428 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429
430 return 0;
431}
432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200434element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200436 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 PyObject* *children;
438
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700439 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 /* make sure self->children can hold the given number of extra
441 elements. set an exception and return -1 if allocation failed */
442
Victor Stinner5f0af232013-07-11 23:01:36 +0200443 if (!self->extra) {
444 if (create_extra(self, NULL) < 0)
445 return -1;
446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200448 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449
450 if (size > self->extra->allocated) {
451 /* use Python 2.4's list growth strategy */
452 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100454 * which needs at least 4 bytes.
455 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000456 * be safe.
457 */
458 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200459 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
460 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000461 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000462 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100463 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 * false alarm always assume at least one child to be safe.
465 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000466 children = PyObject_Realloc(self->extra->children,
467 size * sizeof(PyObject*));
468 if (!children)
469 goto nomemory;
470 } else {
471 children = PyObject_Malloc(size * sizeof(PyObject*));
472 if (!children)
473 goto nomemory;
474 /* copy existing children from static area to malloc buffer */
475 memcpy(children, self->extra->children,
476 self->extra->length * sizeof(PyObject*));
477 }
478 self->extra->children = children;
479 self->extra->allocated = size;
480 }
481
482 return 0;
483
484 nomemory:
485 PyErr_NoMemory();
486 return -1;
487}
488
489LOCAL(int)
490element_add_subelement(ElementObject* self, PyObject* element)
491{
492 /* add a child element to a parent */
493
494 if (element_resize(self, 1) < 0)
495 return -1;
496
497 Py_INCREF(element);
498 self->extra->children[self->extra->length] = element;
499
500 self->extra->length++;
501
502 return 0;
503}
504
505LOCAL(PyObject*)
506element_get_attrib(ElementObject* self)
507{
508 /* return borrowed reference to attrib dictionary */
509 /* note: this function assumes that the extra section exists */
510
511 PyObject* res = self->extra->attrib;
512
513 if (res == Py_None) {
514 /* create missing dictionary */
515 res = PyDict_New();
516 if (!res)
517 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200518 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000519 self->extra->attrib = res;
520 }
521
522 return res;
523}
524
525LOCAL(PyObject*)
526element_get_text(ElementObject* self)
527{
528 /* return borrowed reference to text attribute */
529
Serhiy Storchaka576def02017-03-30 09:47:31 +0300530 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000531
532 if (JOIN_GET(res)) {
533 res = JOIN_OBJ(res);
534 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300535 PyObject *tmp = list_join(res);
536 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000537 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300538 self->text = tmp;
539 Py_DECREF(res);
540 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000541 }
542 }
543
544 return res;
545}
546
547LOCAL(PyObject*)
548element_get_tail(ElementObject* self)
549{
550 /* return borrowed reference to text attribute */
551
Serhiy Storchaka576def02017-03-30 09:47:31 +0300552 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000553
554 if (JOIN_GET(res)) {
555 res = JOIN_OBJ(res);
556 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300557 PyObject *tmp = list_join(res);
558 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300560 self->tail = tmp;
561 Py_DECREF(res);
562 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 }
564 }
565
566 return res;
567}
568
569static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300570subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000571{
572 PyObject* elem;
573
574 ElementObject* parent;
575 PyObject* tag;
576 PyObject* attrib = NULL;
577 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
578 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800579 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000580 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800581 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582
Eli Bendersky737b1732012-05-29 06:02:56 +0300583 if (attrib) {
584 /* attrib passed as positional arg */
585 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000586 if (!attrib)
587 return NULL;
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700588 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
589 Py_DECREF(attrib);
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 }
592 } else if (kwds) {
593 /* have keyword args */
594 attrib = get_attrib_from_keywords(kwds);
595 if (!attrib)
596 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000597 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300598 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000599 Py_INCREF(Py_None);
600 attrib = Py_None;
601 }
602
Eli Bendersky092af1f2012-03-04 07:14:03 +0200603 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200605 if (elem == NULL)
606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000608 if (element_add_subelement(parent, elem) < 0) {
609 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000610 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000611 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000612
613 return elem;
614}
615
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616static int
617element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
618{
619 Py_VISIT(self->tag);
620 Py_VISIT(JOIN_OBJ(self->text));
621 Py_VISIT(JOIN_OBJ(self->tail));
622
623 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200624 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300625 Py_VISIT(self->extra->attrib);
626
627 for (i = 0; i < self->extra->length; ++i)
628 Py_VISIT(self->extra->children[i]);
629 }
630 return 0;
631}
632
633static int
634element_gc_clear(ElementObject *self)
635{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700637 _clear_joined_ptr(&self->text);
638 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639
640 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300641 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300642 */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700643 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300644 return 0;
645}
646
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647static void
648element_dealloc(ElementObject* self)
649{
INADA Naokia6296d32017-08-24 14:55:17 +0900650 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300651 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200652 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300653
654 if (self->weakreflist != NULL)
655 PyObject_ClearWeakRefs((PyObject *) self);
656
Eli Bendersky0192ba32012-03-30 16:38:33 +0300657 /* element_gc_clear clears all references and deallocates extra
658 */
659 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660
661 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200662 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200663 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664}
665
666/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668/*[clinic input]
669_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
Serhiy Storchakacb985562015-05-04 15:32:48 +0300671 subelement: object(subclass_of='&Element_Type')
672 /
673
674[clinic start generated code]*/
675
676static PyObject *
677_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
678/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
679{
680 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000681 return NULL;
682
683 Py_RETURN_NONE;
684}
685
Serhiy Storchakacb985562015-05-04 15:32:48 +0300686/*[clinic input]
687_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000688
Serhiy Storchakacb985562015-05-04 15:32:48 +0300689[clinic start generated code]*/
690
691static PyObject *
692_elementtree_Element_clear_impl(ElementObject *self)
693/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
694{
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700695 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696
697 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300698 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300701 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702
703 Py_RETURN_NONE;
704}
705
Serhiy Storchakacb985562015-05-04 15:32:48 +0300706/*[clinic input]
707_elementtree.Element.__copy__
708
709[clinic start generated code]*/
710
711static PyObject *
712_elementtree_Element___copy___impl(ElementObject *self)
713/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000714{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200715 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 ElementObject* element;
717
Eli Bendersky092af1f2012-03-04 07:14:03 +0200718 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800719 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 if (!element)
721 return NULL;
722
Oren Milman39ecb9c2017-10-10 23:26:24 +0300723 Py_INCREF(JOIN_OBJ(self->text));
724 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725
Oren Milman39ecb9c2017-10-10 23:26:24 +0300726 Py_INCREF(JOIN_OBJ(self->tail));
727 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000728
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700729 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000731 if (element_resize(element, self->extra->length) < 0) {
732 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000733 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000734 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000735
736 for (i = 0; i < self->extra->length; i++) {
737 Py_INCREF(self->extra->children[i]);
738 element->extra->children[i] = self->extra->children[i];
739 }
740
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700741 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000742 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 }
744
745 return (PyObject*) element;
746}
747
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200748/* Helper for a deep copy. */
749LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
Oren Milmand0568182017-09-12 17:39:15 +0300754 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300755 /
756
757[clinic start generated code]*/
758
759static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300760_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300797 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000798
799 tail = deepcopy(JOIN_OBJ(self->tail), memo);
800 if (!tail)
801 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300802 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700804 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806 if (element_resize(element, self->extra->length) < 0)
807 goto error;
808
809 for (i = 0; i < self->extra->length; i++) {
810 PyObject* child = deepcopy(self->extra->children[i], memo);
811 if (!child) {
812 element->extra->length = i;
813 goto error;
814 }
815 element->extra->children[i] = child;
816 }
817
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700818 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700823 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200841LOCAL(PyObject *)
842deepcopy(PyObject *object, PyObject *memo)
843{
844 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200845 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200846 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200847
848 /* Fast paths */
849 if (object == Py_None || PyUnicode_CheckExact(object)) {
850 Py_INCREF(object);
851 return object;
852 }
853
854 if (Py_REFCNT(object) == 1) {
855 if (PyDict_CheckExact(object)) {
856 PyObject *key, *value;
857 Py_ssize_t pos = 0;
858 int simple = 1;
859 while (PyDict_Next(object, &pos, &key, &value)) {
860 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
861 simple = 0;
862 break;
863 }
864 }
865 if (simple)
866 return PyDict_Copy(object);
867 /* Fall through to general case */
868 }
869 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300870 return _elementtree_Element___deepcopy___impl(
871 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200872 }
873 }
874
875 /* General case */
876 st = ET_STATE_GLOBAL;
877 if (!st->deepcopy_obj) {
878 PyErr_SetString(PyExc_RuntimeError,
879 "deepcopy helper not found");
880 return NULL;
881 }
882
Victor Stinner7fbac452016-08-20 01:34:44 +0200883 stack[0] = object;
884 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200885 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200886}
887
888
Serhiy Storchakacb985562015-05-04 15:32:48 +0300889/*[clinic input]
890_elementtree.Element.__sizeof__ -> Py_ssize_t
891
892[clinic start generated code]*/
893
894static Py_ssize_t
895_elementtree_Element___sizeof___impl(ElementObject *self)
896/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200897{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200898 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200899 if (self->extra) {
900 result += sizeof(ElementObjectExtra);
901 if (self->extra->children != self->extra->_children)
902 result += sizeof(PyObject*) * self->extra->allocated;
903 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300904 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200905}
906
Eli Bendersky698bdb22013-01-10 06:01:06 -0800907/* dict keys for getstate/setstate. */
908#define PICKLED_TAG "tag"
909#define PICKLED_CHILDREN "_children"
910#define PICKLED_ATTRIB "attrib"
911#define PICKLED_TAIL "tail"
912#define PICKLED_TEXT "text"
913
914/* __getstate__ returns a fabricated instance dict as in the pure-Python
915 * Element implementation, for interoperability/interchangeability. This
916 * makes the pure-Python implementation details an API, but (a) there aren't
917 * any unnecessary structures there; and (b) it buys compatibility with 3.2
918 * pickles. See issue #16076.
919 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300920/*[clinic input]
921_elementtree.Element.__getstate__
922
923[clinic start generated code]*/
924
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300926_elementtree_Element___getstate___impl(ElementObject *self)
927/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800928{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200929 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800930 PyObject *instancedict = NULL, *children;
931
932 /* Build a list of children. */
933 children = PyList_New(self->extra ? self->extra->length : 0);
934 if (!children)
935 return NULL;
936 for (i = 0; i < PyList_GET_SIZE(children); i++) {
937 PyObject *child = self->extra->children[i];
938 Py_INCREF(child);
939 PyList_SET_ITEM(children, i, child);
940 }
941
942 /* Construct the state object. */
943 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
944 if (noattrib)
945 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
946 PICKLED_TAG, self->tag,
947 PICKLED_CHILDREN, children,
948 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700949 PICKLED_TEXT, JOIN_OBJ(self->text),
950 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800951 else
952 instancedict = Py_BuildValue("{sOsOsOsOsO}",
953 PICKLED_TAG, self->tag,
954 PICKLED_CHILDREN, children,
955 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700956 PICKLED_TEXT, JOIN_OBJ(self->text),
957 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800958 if (instancedict) {
959 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800960 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800961 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800962 else {
963 for (i = 0; i < PyList_GET_SIZE(children); i++)
964 Py_DECREF(PyList_GET_ITEM(children, i));
965 Py_DECREF(children);
966
967 return NULL;
968 }
969}
970
971static PyObject *
972element_setstate_from_attributes(ElementObject *self,
973 PyObject *tag,
974 PyObject *attrib,
975 PyObject *text,
976 PyObject *tail,
977 PyObject *children)
978{
979 Py_ssize_t i, nchildren;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700980 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800981
982 if (!tag) {
983 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
984 return NULL;
985 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200987 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300988 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800989
Oren Milman39ecb9c2017-10-10 23:26:24 +0300990 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
991 Py_INCREF(JOIN_OBJ(text));
992 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800993
Oren Milman39ecb9c2017-10-10 23:26:24 +0300994 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
995 Py_INCREF(JOIN_OBJ(tail));
996 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997
998 /* Handle ATTRIB and CHILDREN. */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700999 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001000 Py_RETURN_NONE;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001001 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001002
1003 /* Compute 'nchildren'. */
1004 if (children) {
1005 if (!PyList_Check(children)) {
1006 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1007 return NULL;
1008 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001009 nchildren = PyList_GET_SIZE(children);
1010
1011 /* (Re-)allocate 'extra'.
1012 Avoid DECREFs calling into this code again (cycles, etc.)
1013 */
1014 oldextra = self->extra;
1015 self->extra = NULL;
1016 if (element_resize(self, nchildren)) {
1017 assert(!self->extra || !self->extra->length);
1018 clear_extra(self);
1019 self->extra = oldextra;
1020 return NULL;
1021 }
1022 assert(self->extra);
1023 assert(self->extra->allocated >= nchildren);
1024 if (oldextra) {
1025 assert(self->extra->attrib == Py_None);
1026 self->extra->attrib = oldextra->attrib;
1027 oldextra->attrib = Py_None;
1028 }
1029
1030 /* Copy children */
1031 for (i = 0; i < nchildren; i++) {
1032 self->extra->children[i] = PyList_GET_ITEM(children, i);
1033 Py_INCREF(self->extra->children[i]);
1034 }
1035
1036 assert(!self->extra->length);
1037 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001038 }
1039 else {
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001040 if (element_resize(self, 0)) {
1041 return NULL;
1042 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001043 }
1044
Eli Bendersky698bdb22013-01-10 06:01:06 -08001045 /* Stash attrib. */
1046 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001047 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001048 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001049 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001050 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001051
1052 Py_RETURN_NONE;
1053}
1054
1055/* __setstate__ for Element instance from the Python implementation.
1056 * 'state' should be the instance dict.
1057 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001058
Eli Bendersky698bdb22013-01-10 06:01:06 -08001059static PyObject *
1060element_setstate_from_Python(ElementObject *self, PyObject *state)
1061{
1062 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1063 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1064 PyObject *args;
1065 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001066 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001067
Eli Bendersky698bdb22013-01-10 06:01:06 -08001068 tag = attrib = text = tail = children = NULL;
1069 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001070 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001071 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001072
1073 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1074 &attrib, &text, &tail, &children))
1075 retval = element_setstate_from_attributes(self, tag, attrib, text,
1076 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001077 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001078 retval = NULL;
1079
1080 Py_DECREF(args);
1081 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.__setstate__
1086
1087 state: object
1088 /
1089
1090[clinic start generated code]*/
1091
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001093_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1094/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001095{
1096 if (!PyDict_CheckExact(state)) {
1097 PyErr_Format(PyExc_TypeError,
1098 "Don't know how to unpickle \"%.200R\" as an Element",
1099 state);
1100 return NULL;
1101 }
1102 else
1103 return element_setstate_from_Python(self, state);
1104}
1105
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001106LOCAL(int)
1107checkpath(PyObject* tag)
1108{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001109 Py_ssize_t i;
1110 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111
1112 /* check if a tag contains an xpath character */
1113
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001114#define PATHCHAR(ch) \
1115 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1119 void *data = PyUnicode_DATA(tag);
1120 unsigned int kind = PyUnicode_KIND(tag);
1121 for (i = 0; i < len; i++) {
1122 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1123 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001124 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001126 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001128 return 1;
1129 }
1130 return 0;
1131 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001132 if (PyBytes_Check(tag)) {
1133 char *p = PyBytes_AS_STRING(tag);
1134 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001135 if (p[i] == '{')
1136 check = 0;
1137 else if (p[i] == '}')
1138 check = 1;
1139 else if (check && PATHCHAR(p[i]))
1140 return 1;
1141 }
1142 return 0;
1143 }
1144
1145 return 1; /* unknown type; might be path expression */
1146}
1147
Serhiy Storchakacb985562015-05-04 15:32:48 +03001148/*[clinic input]
1149_elementtree.Element.extend
1150
1151 elements: object
1152 /
1153
1154[clinic start generated code]*/
1155
1156static PyObject *
1157_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1158/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159{
1160 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001161 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001162
Serhiy Storchakacb985562015-05-04 15:32:48 +03001163 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001164 if (!seq) {
1165 PyErr_Format(
1166 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001167 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001168 );
1169 return NULL;
1170 }
1171
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001172 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001173 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001174 Py_INCREF(element);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001175 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001176 PyErr_Format(
1177 PyExc_TypeError,
1178 "expected an Element, not \"%.200s\"",
1179 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001180 Py_DECREF(seq);
1181 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001182 return NULL;
1183 }
1184
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001185 if (element_add_subelement(self, element) < 0) {
1186 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001187 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188 return NULL;
1189 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191 }
1192
1193 Py_DECREF(seq);
1194
1195 Py_RETURN_NONE;
1196}
1197
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198/*[clinic input]
1199_elementtree.Element.find
1200
1201 path: object
1202 namespaces: object = None
1203
1204[clinic start generated code]*/
1205
1206static PyObject *
1207_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1208 PyObject *namespaces)
1209/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001210{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001211 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001212 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001213
Serhiy Storchakacb985562015-05-04 15:32:48 +03001214 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001215 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001216 return _PyObject_CallMethodIdObjArgs(
1217 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001220
1221 if (!self->extra)
1222 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 for (i = 0; i < self->extra->length; i++) {
1225 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001226 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001227 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001228 continue;
1229 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001230 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001231 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001232 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001233 Py_DECREF(item);
1234 if (rc < 0)
1235 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 }
1237
1238 Py_RETURN_NONE;
1239}
1240
Serhiy Storchakacb985562015-05-04 15:32:48 +03001241/*[clinic input]
1242_elementtree.Element.findtext
1243
1244 path: object
1245 default: object = None
1246 namespaces: object = None
1247
1248[clinic start generated code]*/
1249
1250static PyObject *
1251_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1252 PyObject *default_value,
1253 PyObject *namespaces)
1254/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001255{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001256 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001257 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001258 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001259
Serhiy Storchakacb985562015-05-04 15:32:48 +03001260 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001261 return _PyObject_CallMethodIdObjArgs(
1262 st->elementpath_obj, &PyId_findtext,
1263 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001264 );
1265
1266 if (!self->extra) {
1267 Py_INCREF(default_value);
1268 return default_value;
1269 }
1270
1271 for (i = 0; i < self->extra->length; i++) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001272 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001273 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001274 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001275 continue;
1276 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001277 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001278 if (rc > 0) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001279 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001280 if (text == Py_None) {
1281 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001282 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001283 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001284 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001285 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286 return text;
1287 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001288 Py_DECREF(item);
1289 if (rc < 0)
1290 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001291 }
1292
1293 Py_INCREF(default_value);
1294 return default_value;
1295}
1296
Serhiy Storchakacb985562015-05-04 15:32:48 +03001297/*[clinic input]
1298_elementtree.Element.findall
1299
1300 path: object
1301 namespaces: object = None
1302
1303[clinic start generated code]*/
1304
1305static PyObject *
1306_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1307 PyObject *namespaces)
1308/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001310 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001311 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001312 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001313
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001314 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001315 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001316 return _PyObject_CallMethodIdObjArgs(
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001317 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001319 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001320
1321 out = PyList_New(0);
1322 if (!out)
1323 return NULL;
1324
1325 if (!self->extra)
1326 return out;
1327
1328 for (i = 0; i < self->extra->length; i++) {
1329 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001330 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001331 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001332 continue;
1333 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001334 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001335 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1336 Py_DECREF(item);
1337 Py_DECREF(out);
1338 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001339 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001340 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341 }
1342
1343 return out;
1344}
1345
Serhiy Storchakacb985562015-05-04 15:32:48 +03001346/*[clinic input]
1347_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001348
Serhiy Storchakacb985562015-05-04 15:32:48 +03001349 path: object
1350 namespaces: object = None
1351
1352[clinic start generated code]*/
1353
1354static PyObject *
1355_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1356 PyObject *namespaces)
1357/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1358{
1359 PyObject* tag = path;
1360 _Py_IDENTIFIER(iterfind);
1361 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362
Victor Stinnerf5616342016-12-09 15:26:00 +01001363 return _PyObject_CallMethodIdObjArgs(
1364 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001365}
1366
Serhiy Storchakacb985562015-05-04 15:32:48 +03001367/*[clinic input]
1368_elementtree.Element.get
1369
1370 key: object
1371 default: object = None
1372
1373[clinic start generated code]*/
1374
1375static PyObject *
1376_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1377 PyObject *default_value)
1378/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001379{
1380 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001381
1382 if (!self->extra || self->extra->attrib == Py_None)
1383 value = default_value;
1384 else {
1385 value = PyDict_GetItem(self->extra->attrib, key);
1386 if (!value)
1387 value = default_value;
1388 }
1389
1390 Py_INCREF(value);
1391 return value;
1392}
1393
Serhiy Storchakacb985562015-05-04 15:32:48 +03001394/*[clinic input]
1395_elementtree.Element.getchildren
1396
1397[clinic start generated code]*/
1398
1399static PyObject *
1400_elementtree_Element_getchildren_impl(ElementObject *self)
1401/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001403 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404 PyObject* list;
1405
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001406 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1407 "This method will be removed in future versions. "
1408 "Use 'list(elem)' or iteration over elem instead.",
1409 1) < 0) {
1410 return NULL;
1411 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001413 if (!self->extra)
1414 return PyList_New(0);
1415
1416 list = PyList_New(self->extra->length);
1417 if (!list)
1418 return NULL;
1419
1420 for (i = 0; i < self->extra->length; i++) {
1421 PyObject* item = self->extra->children[i];
1422 Py_INCREF(item);
1423 PyList_SET_ITEM(list, i, item);
1424 }
1425
1426 return list;
1427}
1428
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001429
Eli Bendersky64d11e62012-06-15 07:42:50 +03001430static PyObject *
1431create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1432
1433
Serhiy Storchakacb985562015-05-04 15:32:48 +03001434/*[clinic input]
1435_elementtree.Element.iter
1436
1437 tag: object = None
1438
1439[clinic start generated code]*/
1440
Eli Bendersky64d11e62012-06-15 07:42:50 +03001441static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001442_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1443/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001444{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001445 if (PyUnicode_Check(tag)) {
1446 if (PyUnicode_READY(tag) < 0)
1447 return NULL;
1448 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1449 tag = Py_None;
1450 }
1451 else if (PyBytes_Check(tag)) {
1452 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1453 tag = Py_None;
1454 }
1455
Eli Bendersky64d11e62012-06-15 07:42:50 +03001456 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001457}
1458
1459
Serhiy Storchakacb985562015-05-04 15:32:48 +03001460/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001461_elementtree.Element.getiterator
1462
1463 tag: object = None
1464
1465[clinic start generated code]*/
1466
1467static PyObject *
1468_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1469/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1470{
1471 /* Change for a DeprecationWarning in 1.4 */
1472 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1473 "This method will be removed in future versions. "
1474 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1475 1) < 0) {
1476 return NULL;
1477 }
1478 return _elementtree_Element_iter_impl(self, tag);
1479}
1480
1481
1482/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001483_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001484
Serhiy Storchakacb985562015-05-04 15:32:48 +03001485[clinic start generated code]*/
1486
1487static PyObject *
1488_elementtree_Element_itertext_impl(ElementObject *self)
1489/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1490{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001491 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492}
1493
Eli Bendersky64d11e62012-06-15 07:42:50 +03001494
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001496element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001498 ElementObject* self = (ElementObject*) self_;
1499
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500 if (!self->extra || index < 0 || index >= self->extra->length) {
1501 PyErr_SetString(
1502 PyExc_IndexError,
1503 "child index out of range"
1504 );
1505 return NULL;
1506 }
1507
1508 Py_INCREF(self->extra->children[index]);
1509 return self->extra->children[index];
1510}
1511
Serhiy Storchakacb985562015-05-04 15:32:48 +03001512/*[clinic input]
1513_elementtree.Element.insert
1514
1515 index: Py_ssize_t
1516 subelement: object(subclass_of='&Element_Type')
1517 /
1518
1519[clinic start generated code]*/
1520
1521static PyObject *
1522_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1523 PyObject *subelement)
1524/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001526 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527
Victor Stinner5f0af232013-07-11 23:01:36 +02001528 if (!self->extra) {
1529 if (create_extra(self, NULL) < 0)
1530 return NULL;
1531 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001532
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001533 if (index < 0) {
1534 index += self->extra->length;
1535 if (index < 0)
1536 index = 0;
1537 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001538 if (index > self->extra->length)
1539 index = self->extra->length;
1540
1541 if (element_resize(self, 1) < 0)
1542 return NULL;
1543
1544 for (i = self->extra->length; i > index; i--)
1545 self->extra->children[i] = self->extra->children[i-1];
1546
Serhiy Storchakacb985562015-05-04 15:32:48 +03001547 Py_INCREF(subelement);
1548 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549
1550 self->extra->length++;
1551
1552 Py_RETURN_NONE;
1553}
1554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555/*[clinic input]
1556_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557
Serhiy Storchakacb985562015-05-04 15:32:48 +03001558[clinic start generated code]*/
1559
1560static PyObject *
1561_elementtree_Element_items_impl(ElementObject *self)
1562/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1563{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564 if (!self->extra || self->extra->attrib == Py_None)
1565 return PyList_New(0);
1566
1567 return PyDict_Items(self->extra->attrib);
1568}
1569
Serhiy Storchakacb985562015-05-04 15:32:48 +03001570/*[clinic input]
1571_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572
Serhiy Storchakacb985562015-05-04 15:32:48 +03001573[clinic start generated code]*/
1574
1575static PyObject *
1576_elementtree_Element_keys_impl(ElementObject *self)
1577/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1578{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579 if (!self->extra || self->extra->attrib == Py_None)
1580 return PyList_New(0);
1581
1582 return PyDict_Keys(self->extra->attrib);
1583}
1584
Martin v. Löwis18e16552006-02-15 17:27:45 +00001585static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586element_length(ElementObject* self)
1587{
1588 if (!self->extra)
1589 return 0;
1590
1591 return self->extra->length;
1592}
1593
Serhiy Storchakacb985562015-05-04 15:32:48 +03001594/*[clinic input]
1595_elementtree.Element.makeelement
1596
1597 tag: object
1598 attrib: object
1599 /
1600
1601[clinic start generated code]*/
1602
1603static PyObject *
1604_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1605 PyObject *attrib)
1606/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607{
1608 PyObject* elem;
1609
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 attrib = PyDict_Copy(attrib);
1611 if (!attrib)
1612 return NULL;
1613
Eli Bendersky092af1f2012-03-04 07:14:03 +02001614 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615
1616 Py_DECREF(attrib);
1617
1618 return elem;
1619}
1620
Serhiy Storchakacb985562015-05-04 15:32:48 +03001621/*[clinic input]
1622_elementtree.Element.remove
1623
1624 subelement: object(subclass_of='&Element_Type')
1625 /
1626
1627[clinic start generated code]*/
1628
1629static PyObject *
1630_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1631/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001633 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001634 int rc;
1635 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 if (!self->extra) {
1638 /* element has no children, so raise exception */
1639 PyErr_SetString(
1640 PyExc_ValueError,
1641 "list.remove(x): x not in list"
1642 );
1643 return NULL;
1644 }
1645
1646 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001647 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001648 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001649 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001650 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001652 if (rc < 0)
1653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001654 }
1655
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001656 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001657 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658 PyErr_SetString(
1659 PyExc_ValueError,
1660 "list.remove(x): x not in list"
1661 );
1662 return NULL;
1663 }
1664
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001665 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666
1667 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 for (; i < self->extra->length; i++)
1669 self->extra->children[i] = self->extra->children[i+1];
1670
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001671 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672 Py_RETURN_NONE;
1673}
1674
1675static PyObject*
1676element_repr(ElementObject* self)
1677{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001678 int status;
1679
1680 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001681 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001682
1683 status = Py_ReprEnter((PyObject *)self);
1684 if (status == 0) {
1685 PyObject *res;
1686 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1687 Py_ReprLeave((PyObject *)self);
1688 return res;
1689 }
1690 if (status > 0)
1691 PyErr_Format(PyExc_RuntimeError,
1692 "reentrant call inside %s.__repr__",
1693 Py_TYPE(self)->tp_name);
1694 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695}
1696
Serhiy Storchakacb985562015-05-04 15:32:48 +03001697/*[clinic input]
1698_elementtree.Element.set
1699
1700 key: object
1701 value: object
1702 /
1703
1704[clinic start generated code]*/
1705
1706static PyObject *
1707_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1708 PyObject *value)
1709/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001710{
1711 PyObject* attrib;
1712
Victor Stinner5f0af232013-07-11 23:01:36 +02001713 if (!self->extra) {
1714 if (create_extra(self, NULL) < 0)
1715 return NULL;
1716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001717
1718 attrib = element_get_attrib(self);
1719 if (!attrib)
1720 return NULL;
1721
1722 if (PyDict_SetItem(attrib, key, value) < 0)
1723 return NULL;
1724
1725 Py_RETURN_NONE;
1726}
1727
1728static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001729element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001731 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001732 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001733 PyObject* old;
1734
1735 if (!self->extra || index < 0 || index >= self->extra->length) {
1736 PyErr_SetString(
1737 PyExc_IndexError,
1738 "child assignment index out of range");
1739 return -1;
1740 }
1741
1742 old = self->extra->children[index];
1743
1744 if (item) {
1745 Py_INCREF(item);
1746 self->extra->children[index] = item;
1747 } else {
1748 self->extra->length--;
1749 for (i = index; i < self->extra->length; i++)
1750 self->extra->children[i] = self->extra->children[i+1];
1751 }
1752
1753 Py_DECREF(old);
1754
1755 return 0;
1756}
1757
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001758static PyObject*
1759element_subscr(PyObject* self_, PyObject* item)
1760{
1761 ElementObject* self = (ElementObject*) self_;
1762
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763 if (PyIndex_Check(item)) {
1764 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001765
1766 if (i == -1 && PyErr_Occurred()) {
1767 return NULL;
1768 }
1769 if (i < 0 && self->extra)
1770 i += self->extra->length;
1771 return element_getitem(self_, i);
1772 }
1773 else if (PySlice_Check(item)) {
1774 Py_ssize_t start, stop, step, slicelen, cur, i;
1775 PyObject* list;
1776
1777 if (!self->extra)
1778 return PyList_New(0);
1779
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001780 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001781 return NULL;
1782 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001783 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1784 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001785
1786 if (slicelen <= 0)
1787 return PyList_New(0);
1788 else {
1789 list = PyList_New(slicelen);
1790 if (!list)
1791 return NULL;
1792
1793 for (cur = start, i = 0; i < slicelen;
1794 cur += step, i++) {
1795 PyObject* item = self->extra->children[cur];
1796 Py_INCREF(item);
1797 PyList_SET_ITEM(list, i, item);
1798 }
1799
1800 return list;
1801 }
1802 }
1803 else {
1804 PyErr_SetString(PyExc_TypeError,
1805 "element indices must be integers");
1806 return NULL;
1807 }
1808}
1809
1810static int
1811element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1812{
1813 ElementObject* self = (ElementObject*) self_;
1814
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001815 if (PyIndex_Check(item)) {
1816 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001817
1818 if (i == -1 && PyErr_Occurred()) {
1819 return -1;
1820 }
1821 if (i < 0 && self->extra)
1822 i += self->extra->length;
1823 return element_setitem(self_, i, value);
1824 }
1825 else if (PySlice_Check(item)) {
1826 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1827
1828 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001829 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001830
Victor Stinner5f0af232013-07-11 23:01:36 +02001831 if (!self->extra) {
1832 if (create_extra(self, NULL) < 0)
1833 return -1;
1834 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001835
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001836 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001837 return -1;
1838 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001839 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1840 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841
Eli Bendersky865756a2012-03-09 13:38:15 +02001842 if (value == NULL) {
1843 /* Delete slice */
1844 size_t cur;
1845 Py_ssize_t i;
1846
1847 if (slicelen <= 0)
1848 return 0;
1849
1850 /* Since we're deleting, the direction of the range doesn't matter,
1851 * so for simplicity make it always ascending.
1852 */
1853 if (step < 0) {
1854 stop = start + 1;
1855 start = stop + step * (slicelen - 1) - 1;
1856 step = -step;
1857 }
1858
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001859 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001860
1861 /* recycle is a list that will contain all the children
1862 * scheduled for removal.
1863 */
1864 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001865 return -1;
1866 }
1867
1868 /* This loop walks over all the children that have to be deleted,
1869 * with cur pointing at them. num_moved is the amount of children
1870 * until the next deleted child that have to be "shifted down" to
1871 * occupy the deleted's places.
1872 * Note that in the ith iteration, shifting is done i+i places down
1873 * because i children were already removed.
1874 */
1875 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1876 /* Compute how many children have to be moved, clipping at the
1877 * list end.
1878 */
1879 Py_ssize_t num_moved = step - 1;
1880 if (cur + step >= (size_t)self->extra->length) {
1881 num_moved = self->extra->length - cur - 1;
1882 }
1883
1884 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1885
1886 memmove(
1887 self->extra->children + cur - i,
1888 self->extra->children + cur + 1,
1889 num_moved * sizeof(PyObject *));
1890 }
1891
1892 /* Leftover "tail" after the last removed child */
1893 cur = start + (size_t)slicelen * step;
1894 if (cur < (size_t)self->extra->length) {
1895 memmove(
1896 self->extra->children + cur - slicelen,
1897 self->extra->children + cur,
1898 (self->extra->length - cur) * sizeof(PyObject *));
1899 }
1900
1901 self->extra->length -= slicelen;
1902
1903 /* Discard the recycle list with all the deleted sub-elements */
Miss Islington (bot)c46f0422018-10-23 12:45:44 -07001904 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001905 return 0;
1906 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001907
1908 /* A new slice is actually being assigned */
1909 seq = PySequence_Fast(value, "");
1910 if (!seq) {
1911 PyErr_Format(
1912 PyExc_TypeError,
1913 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1914 );
1915 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001916 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001917 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918
1919 if (step != 1 && newlen != slicelen)
1920 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001921 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001922 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001923 "attempt to assign sequence of size %zd "
1924 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001925 newlen, slicelen
1926 );
1927 return -1;
1928 }
1929
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001930 /* Resize before creating the recycle bin, to prevent refleaks. */
1931 if (newlen > slicelen) {
1932 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001933 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001934 return -1;
1935 }
1936 }
1937
1938 if (slicelen > 0) {
1939 /* to avoid recursive calls to this method (via decref), move
1940 old items to the recycle bin here, and get rid of them when
1941 we're done modifying the element */
1942 recycle = PyList_New(slicelen);
1943 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001944 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001945 return -1;
1946 }
1947 for (cur = start, i = 0; i < slicelen;
1948 cur += step, i++)
1949 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1950 }
1951
1952 if (newlen < slicelen) {
1953 /* delete slice */
1954 for (i = stop; i < self->extra->length; i++)
1955 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1956 } else if (newlen > slicelen) {
1957 /* insert slice */
1958 for (i = self->extra->length-1; i >= stop; i--)
1959 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1960 }
1961
1962 /* replace the slice */
1963 for (cur = start, i = 0; i < newlen;
1964 cur += step, i++) {
1965 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1966 Py_INCREF(element);
1967 self->extra->children[cur] = element;
1968 }
1969
1970 self->extra->length += newlen - slicelen;
1971
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001972 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001973
1974 /* discard the recycle bin, and everything in it */
1975 Py_XDECREF(recycle);
1976
1977 return 0;
1978 }
1979 else {
1980 PyErr_SetString(PyExc_TypeError,
1981 "element indices must be integers");
1982 return -1;
1983 }
1984}
1985
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001986static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001987element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001988{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001989 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001990 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001991 return res;
1992}
1993
Serhiy Storchakadde08152015-11-25 15:28:13 +02001994static PyObject*
1995element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001996{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001997 PyObject *res = element_get_text(self);
1998 Py_XINCREF(res);
1999 return res;
2000}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002001
Serhiy Storchakadde08152015-11-25 15:28:13 +02002002static PyObject*
2003element_tail_getter(ElementObject *self, void *closure)
2004{
2005 PyObject *res = element_get_tail(self);
2006 Py_XINCREF(res);
2007 return res;
2008}
2009
2010static PyObject*
2011element_attrib_getter(ElementObject *self, void *closure)
2012{
2013 PyObject *res;
2014 if (!self->extra) {
2015 if (create_extra(self, NULL) < 0)
2016 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002017 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002018 res = element_get_attrib(self);
2019 Py_XINCREF(res);
2020 return res;
2021}
Victor Stinner4d463432013-07-11 23:05:03 +02002022
Serhiy Storchakadde08152015-11-25 15:28:13 +02002023/* macro for setter validation */
2024#define _VALIDATE_ATTR_VALUE(V) \
2025 if ((V) == NULL) { \
2026 PyErr_SetString( \
2027 PyExc_AttributeError, \
2028 "can't delete element attribute"); \
2029 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002030 }
2031
Serhiy Storchakadde08152015-11-25 15:28:13 +02002032static int
2033element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2034{
2035 _VALIDATE_ATTR_VALUE(value);
2036 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002037 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002038 return 0;
2039}
2040
2041static int
2042element_text_setter(ElementObject *self, PyObject *value, void *closure)
2043{
2044 _VALIDATE_ATTR_VALUE(value);
2045 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002046 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002047 return 0;
2048}
2049
2050static int
2051element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2052{
2053 _VALIDATE_ATTR_VALUE(value);
2054 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002055 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002056 return 0;
2057}
2058
2059static int
2060element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2061{
2062 _VALIDATE_ATTR_VALUE(value);
2063 if (!self->extra) {
2064 if (create_extra(self, NULL) < 0)
2065 return -1;
2066 }
2067 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002068 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002069 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002070}
2071
2072static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002073 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002074 0, /* sq_concat */
2075 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002076 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002077 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002078 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002079 0,
2080};
2081
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082/******************************* Element iterator ****************************/
2083
2084/* ElementIterObject represents the iteration state over an XML element in
2085 * pre-order traversal. To keep track of which sub-element should be returned
2086 * next, a stack of parents is maintained. This is a standard stack-based
2087 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002088 * The stack is managed using a continuous array.
2089 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 * the current one is exhausted, and the next child to examine in that parent.
2091 */
2092typedef struct ParentLocator_t {
2093 ElementObject *parent;
2094 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095} ParentLocator;
2096
2097typedef struct {
2098 PyObject_HEAD
2099 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002100 Py_ssize_t parent_stack_used;
2101 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102 ElementObject *root_element;
2103 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002104 int gettext;
2105} ElementIterObject;
2106
2107
2108static void
2109elementiter_dealloc(ElementIterObject *it)
2110{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002111 Py_ssize_t i = it->parent_stack_used;
2112 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002113 /* bpo-31095: UnTrack is needed before calling any callbacks */
2114 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002115 while (i--)
2116 Py_XDECREF(it->parent_stack[i].parent);
2117 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118
2119 Py_XDECREF(it->sought_tag);
2120 Py_XDECREF(it->root_element);
2121
Eli Bendersky64d11e62012-06-15 07:42:50 +03002122 PyObject_GC_Del(it);
2123}
2124
2125static int
2126elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2127{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002128 Py_ssize_t i = it->parent_stack_used;
2129 while (i--)
2130 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002131
2132 Py_VISIT(it->root_element);
2133 Py_VISIT(it->sought_tag);
2134 return 0;
2135}
2136
2137/* Helper function for elementiter_next. Add a new parent to the parent stack.
2138 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002139static int
2140parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002142 ParentLocator *item;
2143
2144 if (it->parent_stack_used >= it->parent_stack_size) {
2145 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2146 ParentLocator *parent_stack = it->parent_stack;
2147 PyMem_Resize(parent_stack, ParentLocator, new_size);
2148 if (parent_stack == NULL)
2149 return -1;
2150 it->parent_stack = parent_stack;
2151 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002153 item = it->parent_stack + it->parent_stack_used++;
2154 Py_INCREF(parent);
2155 item->parent = parent;
2156 item->child_index = 0;
2157 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158}
2159
2160static PyObject *
2161elementiter_next(ElementIterObject *it)
2162{
2163 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002164 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002165 * A short note on gettext: this function serves both the iter() and
2166 * itertext() methods to avoid code duplication. However, there are a few
2167 * small differences in the way these iterations work. Namely:
2168 * - itertext() only yields text from nodes that have it, and continues
2169 * iterating when a node doesn't have text (so it doesn't return any
2170 * node like iter())
2171 * - itertext() also has to handle tail, after finishing with all the
2172 * children of a node.
2173 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002174 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002175 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002176 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002177
2178 while (1) {
2179 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002180 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002181 * iterator is exhausted.
2182 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002183 if (!it->parent_stack_used) {
2184 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002185 PyErr_SetNone(PyExc_StopIteration);
2186 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002187 }
2188
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002189 elem = it->root_element; /* steals a reference */
2190 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002191 }
2192 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002193 /* See if there are children left to traverse in the current parent. If
2194 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002195 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002196 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2197 Py_ssize_t child_index = item->child_index;
2198 ElementObjectExtra *extra;
2199 elem = item->parent;
2200 extra = elem->extra;
2201 if (!extra || child_index >= extra->length) {
2202 it->parent_stack_used--;
2203 /* Note that extra condition on it->parent_stack_used here;
2204 * this is because itertext() is supposed to only return *inner*
2205 * text, not text following the element it began iteration with.
2206 */
2207 if (it->gettext && it->parent_stack_used) {
2208 text = element_get_tail(elem);
2209 goto gettext;
2210 }
2211 Py_DECREF(elem);
2212 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002213 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002214
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07002215 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002216 PyErr_Format(PyExc_AttributeError,
2217 "'%.100s' object has no attribute 'iter'",
2218 Py_TYPE(extra->children[child_index])->tp_name);
2219 return NULL;
2220 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002221 elem = (ElementObject *)extra->children[child_index];
2222 item->child_index++;
2223 Py_INCREF(elem);
2224 }
2225
2226 if (parent_stack_push_new(it, elem) < 0) {
2227 Py_DECREF(elem);
2228 PyErr_NoMemory();
2229 return NULL;
2230 }
2231 if (it->gettext) {
2232 text = element_get_text(elem);
2233 goto gettext;
2234 }
2235
2236 if (it->sought_tag == Py_None)
2237 return (PyObject *)elem;
2238
2239 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2240 if (rc > 0)
2241 return (PyObject *)elem;
2242
2243 Py_DECREF(elem);
2244 if (rc < 0)
2245 return NULL;
2246 continue;
2247
2248gettext:
2249 if (!text) {
2250 Py_DECREF(elem);
2251 return NULL;
2252 }
2253 if (text == Py_None) {
2254 Py_DECREF(elem);
2255 }
2256 else {
2257 Py_INCREF(text);
2258 Py_DECREF(elem);
2259 rc = PyObject_IsTrue(text);
2260 if (rc > 0)
2261 return text;
2262 Py_DECREF(text);
2263 if (rc < 0)
2264 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002265 }
2266 }
2267
2268 return NULL;
2269}
2270
2271
2272static PyTypeObject ElementIter_Type = {
2273 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002274 /* Using the module's name since the pure-Python implementation does not
2275 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002276 "_elementtree._element_iterator", /* tp_name */
2277 sizeof(ElementIterObject), /* tp_basicsize */
2278 0, /* tp_itemsize */
2279 /* methods */
2280 (destructor)elementiter_dealloc, /* tp_dealloc */
2281 0, /* tp_print */
2282 0, /* tp_getattr */
2283 0, /* tp_setattr */
2284 0, /* tp_reserved */
2285 0, /* tp_repr */
2286 0, /* tp_as_number */
2287 0, /* tp_as_sequence */
2288 0, /* tp_as_mapping */
2289 0, /* tp_hash */
2290 0, /* tp_call */
2291 0, /* tp_str */
2292 0, /* tp_getattro */
2293 0, /* tp_setattro */
2294 0, /* tp_as_buffer */
2295 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2296 0, /* tp_doc */
2297 (traverseproc)elementiter_traverse, /* tp_traverse */
2298 0, /* tp_clear */
2299 0, /* tp_richcompare */
2300 0, /* tp_weaklistoffset */
2301 PyObject_SelfIter, /* tp_iter */
2302 (iternextfunc)elementiter_next, /* tp_iternext */
2303 0, /* tp_methods */
2304 0, /* tp_members */
2305 0, /* tp_getset */
2306 0, /* tp_base */
2307 0, /* tp_dict */
2308 0, /* tp_descr_get */
2309 0, /* tp_descr_set */
2310 0, /* tp_dictoffset */
2311 0, /* tp_init */
2312 0, /* tp_alloc */
2313 0, /* tp_new */
2314};
2315
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002316#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002317
2318static PyObject *
2319create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2320{
2321 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002322
2323 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2324 if (!it)
2325 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002326
Victor Stinner4d463432013-07-11 23:05:03 +02002327 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002328 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002329 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002330 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002331 it->root_element = self;
2332
Eli Bendersky64d11e62012-06-15 07:42:50 +03002333 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002334
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002335 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002336 if (it->parent_stack == NULL) {
2337 Py_DECREF(it);
2338 PyErr_NoMemory();
2339 return NULL;
2340 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002341 it->parent_stack_used = 0;
2342 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002343
Eli Bendersky64d11e62012-06-15 07:42:50 +03002344 return (PyObject *)it;
2345}
2346
2347
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002348/* ==================================================================== */
2349/* the tree builder type */
2350
2351typedef struct {
2352 PyObject_HEAD
2353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355
Antoine Pitrouee329312012-10-04 19:53:29 +02002356 PyObject *this; /* current node */
2357 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358
Eli Bendersky58d548d2012-05-29 15:45:16 +03002359 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360
Eli Bendersky58d548d2012-05-29 15:45:16 +03002361 PyObject *stack; /* element stack */
2362 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363
Eli Bendersky48d358b2012-05-30 17:57:50 +03002364 PyObject *element_factory;
2365
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002367 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002368 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2369 PyObject *end_event_obj;
2370 PyObject *start_ns_event_obj;
2371 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372} TreeBuilderObject;
2373
Christian Heimes90aa7642007-12-19 02:45:37 +00002374#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002375
2376/* -------------------------------------------------------------------- */
2377/* constructor and destructor */
2378
Eli Bendersky58d548d2012-05-29 15:45:16 +03002379static PyObject *
2380treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002382 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2383 if (t != NULL) {
2384 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385
Eli Bendersky58d548d2012-05-29 15:45:16 +03002386 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002387 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002389 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002392 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002393 t->stack = PyList_New(20);
2394 if (!t->stack) {
2395 Py_DECREF(t->this);
2396 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002397 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002398 return NULL;
2399 }
2400 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002401
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002402 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002403 t->start_event_obj = t->end_event_obj = NULL;
2404 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2405 }
2406 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002407}
2408
Serhiy Storchakacb985562015-05-04 15:32:48 +03002409/*[clinic input]
2410_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002411
Serhiy Storchakacb985562015-05-04 15:32:48 +03002412 element_factory: object = NULL
2413
2414[clinic start generated code]*/
2415
2416static int
2417_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2418 PyObject *element_factory)
2419/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2420{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002421 if (element_factory) {
2422 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002423 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002424 }
2425
Eli Bendersky58d548d2012-05-29 15:45:16 +03002426 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427}
2428
Eli Bendersky48d358b2012-05-30 17:57:50 +03002429static int
2430treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2431{
Miss Islington (bot)60c919b2018-12-18 13:40:23 -08002432 Py_VISIT(self->end_ns_event_obj);
2433 Py_VISIT(self->start_ns_event_obj);
2434 Py_VISIT(self->end_event_obj);
2435 Py_VISIT(self->start_event_obj);
2436 Py_VISIT(self->events_append);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002437 Py_VISIT(self->root);
2438 Py_VISIT(self->this);
2439 Py_VISIT(self->last);
2440 Py_VISIT(self->data);
2441 Py_VISIT(self->stack);
2442 Py_VISIT(self->element_factory);
2443 return 0;
2444}
2445
2446static int
2447treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002448{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002449 Py_CLEAR(self->end_ns_event_obj);
2450 Py_CLEAR(self->start_ns_event_obj);
2451 Py_CLEAR(self->end_event_obj);
2452 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002453 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002454 Py_CLEAR(self->stack);
2455 Py_CLEAR(self->data);
2456 Py_CLEAR(self->last);
2457 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002458 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002459 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002460 return 0;
2461}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002462
Eli Bendersky48d358b2012-05-30 17:57:50 +03002463static void
2464treebuilder_dealloc(TreeBuilderObject *self)
2465{
2466 PyObject_GC_UnTrack(self);
2467 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002468 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002469}
2470
2471/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002472/* helpers for handling of arbitrary element-like objects */
2473
2474static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002475treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002476 PyObject **dest, _Py_Identifier *name)
2477{
2478 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002479 PyObject *tmp = JOIN_OBJ(*dest);
2480 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2481 *data = NULL;
2482 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002483 return 0;
2484 }
2485 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002486 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002487 int r;
2488 if (joined == NULL)
2489 return -1;
2490 r = _PyObject_SetAttrId(element, name, joined);
2491 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002492 if (r < 0)
2493 return -1;
2494 Py_CLEAR(*data);
2495 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002496 }
2497}
2498
Serhiy Storchaka576def02017-03-30 09:47:31 +03002499LOCAL(int)
2500treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002501{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002502 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002503
Serhiy Storchaka576def02017-03-30 09:47:31 +03002504 if (!self->data) {
2505 return 0;
2506 }
2507
2508 if (self->this == element) {
2509 _Py_IDENTIFIER(text);
2510 return treebuilder_set_element_text_or_tail(
2511 element, &self->data,
2512 &((ElementObject *) element)->text, &PyId_text);
2513 }
2514 else {
2515 _Py_IDENTIFIER(tail);
2516 return treebuilder_set_element_text_or_tail(
2517 element, &self->data,
2518 &((ElementObject *) element)->tail, &PyId_tail);
2519 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002520}
2521
2522static int
2523treebuilder_add_subelement(PyObject *element, PyObject *child)
2524{
2525 _Py_IDENTIFIER(append);
2526 if (Element_CheckExact(element)) {
2527 ElementObject *elem = (ElementObject *) element;
2528 return element_add_subelement(elem, child);
2529 }
2530 else {
2531 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002532 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002533 if (res == NULL)
2534 return -1;
2535 Py_DECREF(res);
2536 return 0;
2537 }
2538}
2539
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002540LOCAL(int)
2541treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2542 PyObject *node)
2543{
2544 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002545 PyObject *res;
2546 PyObject *event = PyTuple_Pack(2, action, node);
2547 if (event == NULL)
2548 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002549 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002550 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002551 if (res == NULL)
2552 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002553 Py_DECREF(res);
2554 }
2555 return 0;
2556}
2557
Antoine Pitrouee329312012-10-04 19:53:29 +02002558/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559/* handlers */
2560
2561LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2563 PyObject* attrib)
2564{
2565 PyObject* node;
2566 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002567 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568
Serhiy Storchaka576def02017-03-30 09:47:31 +03002569 if (treebuilder_flush_data(self) < 0) {
2570 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 }
2572
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002573 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002574 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002575 } else if (attrib == Py_None) {
2576 attrib = PyDict_New();
2577 if (!attrib)
2578 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002579 node = PyObject_CallFunctionObjArgs(self->element_factory,
2580 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002581 Py_DECREF(attrib);
2582 }
2583 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002584 node = PyObject_CallFunctionObjArgs(self->element_factory,
2585 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002586 }
2587 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590
Antoine Pitrouee329312012-10-04 19:53:29 +02002591 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002592
2593 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002594 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002595 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 } else {
2597 if (self->root) {
2598 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002599 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600 "multiple elements on top level"
2601 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002602 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 }
2604 Py_INCREF(node);
2605 self->root = node;
2606 }
2607
2608 if (self->index < PyList_GET_SIZE(self->stack)) {
2609 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002610 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 Py_INCREF(this);
2612 } else {
2613 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002614 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615 }
2616 self->index++;
2617
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002619 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002621 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002623 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2624 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625
2626 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002627
2628 error:
2629 Py_DECREF(node);
2630 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631}
2632
2633LOCAL(PyObject*)
2634treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2635{
2636 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002637 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002638 /* ignore calls to data before the first call to start */
2639 Py_RETURN_NONE;
2640 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641 /* store the first item as is */
2642 Py_INCREF(data); self->data = data;
2643 } else {
2644 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002645 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2646 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002647 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648 /* expat often generates single character data sections; handle
2649 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002650 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2651 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002653 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654 } else if (PyList_CheckExact(self->data)) {
2655 if (PyList_Append(self->data, data) < 0)
2656 return NULL;
2657 } else {
2658 PyObject* list = PyList_New(2);
2659 if (!list)
2660 return NULL;
2661 PyList_SET_ITEM(list, 0, self->data);
2662 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2663 self->data = list;
2664 }
2665 }
2666
2667 Py_RETURN_NONE;
2668}
2669
2670LOCAL(PyObject*)
2671treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2672{
2673 PyObject* item;
2674
Serhiy Storchaka576def02017-03-30 09:47:31 +03002675 if (treebuilder_flush_data(self) < 0) {
2676 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677 }
2678
2679 if (self->index == 0) {
2680 PyErr_SetString(
2681 PyExc_IndexError,
2682 "pop from empty stack"
2683 );
2684 return NULL;
2685 }
2686
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002687 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002688 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002689 self->index--;
2690 self->this = PyList_GET_ITEM(self->stack, self->index);
2691 Py_INCREF(self->this);
2692 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002694 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2695 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696
2697 Py_INCREF(self->last);
2698 return (PyObject*) self->last;
2699}
2700
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701/* -------------------------------------------------------------------- */
2702/* methods (in alphabetical order) */
2703
Serhiy Storchakacb985562015-05-04 15:32:48 +03002704/*[clinic input]
2705_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706
Serhiy Storchakacb985562015-05-04 15:32:48 +03002707 data: object
2708 /
2709
2710[clinic start generated code]*/
2711
2712static PyObject *
2713_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2714/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2715{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 return treebuilder_handle_data(self, data);
2717}
2718
Serhiy Storchakacb985562015-05-04 15:32:48 +03002719/*[clinic input]
2720_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721
Serhiy Storchakacb985562015-05-04 15:32:48 +03002722 tag: object
2723 /
2724
2725[clinic start generated code]*/
2726
2727static PyObject *
2728_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2729/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2730{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731 return treebuilder_handle_end(self, tag);
2732}
2733
2734LOCAL(PyObject*)
2735treebuilder_done(TreeBuilderObject* self)
2736{
2737 PyObject* res;
2738
2739 /* FIXME: check stack size? */
2740
2741 if (self->root)
2742 res = self->root;
2743 else
2744 res = Py_None;
2745
2746 Py_INCREF(res);
2747 return res;
2748}
2749
Serhiy Storchakacb985562015-05-04 15:32:48 +03002750/*[clinic input]
2751_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752
Serhiy Storchakacb985562015-05-04 15:32:48 +03002753[clinic start generated code]*/
2754
2755static PyObject *
2756_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2757/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2758{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 return treebuilder_done(self);
2760}
2761
Serhiy Storchakacb985562015-05-04 15:32:48 +03002762/*[clinic input]
2763_elementtree.TreeBuilder.start
2764
2765 tag: object
2766 attrs: object = None
2767 /
2768
2769[clinic start generated code]*/
2770
2771static PyObject *
2772_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2773 PyObject *attrs)
2774/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002776 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777}
2778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779/* ==================================================================== */
2780/* the expat interface */
2781
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002784
2785/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2786 * cached globally without being in per-module state.
2787 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002788static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002790
Eli Bendersky52467b12012-06-01 07:13:08 +03002791static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2792 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794typedef struct {
2795 PyObject_HEAD
2796
2797 XML_Parser parser;
2798
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002799 PyObject *target;
2800 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002802 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002804 PyObject *handle_start;
2805 PyObject *handle_data;
2806 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002808 PyObject *handle_comment;
2809 PyObject *handle_pi;
2810 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002812 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814} XMLParserObject;
2815
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002816static PyObject*
Serhiy Storchakaa5552f02017-12-15 13:11:11 +02002817_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002818static PyObject *
2819_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2820 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002821
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822/* helpers */
2823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824LOCAL(PyObject*)
2825makeuniversal(XMLParserObject* self, const char* string)
2826{
2827 /* convert a UTF-8 tag/attribute name from the expat parser
2828 to a universal name string */
2829
Antoine Pitrouc1948842012-10-01 23:40:37 +02002830 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002831 PyObject* key;
2832 PyObject* value;
2833
2834 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002835 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002836 if (!key)
2837 return NULL;
2838
2839 value = PyDict_GetItem(self->names, key);
2840
2841 if (value) {
2842 Py_INCREF(value);
2843 } else {
2844 /* new name. convert to universal name, and decode as
2845 necessary */
2846
2847 PyObject* tag;
2848 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002849 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002850
2851 /* look for namespace separator */
2852 for (i = 0; i < size; i++)
2853 if (string[i] == '}')
2854 break;
2855 if (i != size) {
2856 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002857 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002858 if (tag == NULL) {
2859 Py_DECREF(key);
2860 return NULL;
2861 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002862 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002863 p[0] = '{';
2864 memcpy(p+1, string, size);
2865 size++;
2866 } else {
2867 /* plain name; use key as tag */
2868 Py_INCREF(key);
2869 tag = key;
2870 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002871
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002873 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002874 value = PyUnicode_DecodeUTF8(p, size, "strict");
2875 Py_DECREF(tag);
2876 if (!value) {
2877 Py_DECREF(key);
2878 return NULL;
2879 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002880
2881 /* add to names dictionary */
2882 if (PyDict_SetItem(self->names, key, value) < 0) {
2883 Py_DECREF(key);
2884 Py_DECREF(value);
2885 return NULL;
2886 }
2887 }
2888
2889 Py_DECREF(key);
2890 return value;
2891}
2892
Eli Bendersky5b77d812012-03-16 08:20:05 +02002893/* Set the ParseError exception with the given parameters.
2894 * If message is not NULL, it's used as the error string. Otherwise, the
2895 * message string is the default for the given error_code.
2896*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002897static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002898expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2899 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002900{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002901 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002902 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002904 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002905 message ? message : EXPAT(ErrorString)(error_code),
2906 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002907 if (errmsg == NULL)
2908 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002909
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002910 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002911 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002912 if (!error)
2913 return;
2914
Eli Bendersky5b77d812012-03-16 08:20:05 +02002915 /* Add code and position attributes */
2916 code = PyLong_FromLong((long)error_code);
2917 if (!code) {
2918 Py_DECREF(error);
2919 return;
2920 }
2921 if (PyObject_SetAttrString(error, "code", code) == -1) {
2922 Py_DECREF(error);
2923 Py_DECREF(code);
2924 return;
2925 }
2926 Py_DECREF(code);
2927
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002928 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002929 if (!position) {
2930 Py_DECREF(error);
2931 return;
2932 }
2933 if (PyObject_SetAttrString(error, "position", position) == -1) {
2934 Py_DECREF(error);
2935 Py_DECREF(position);
2936 return;
2937 }
2938 Py_DECREF(position);
2939
Eli Bendersky532d03e2013-08-10 08:00:39 -07002940 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002941 Py_DECREF(error);
2942}
2943
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944/* -------------------------------------------------------------------- */
2945/* handlers */
2946
2947static void
2948expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2949 int data_len)
2950{
2951 PyObject* key;
2952 PyObject* value;
2953 PyObject* res;
2954
2955 if (data_len < 2 || data_in[0] != '&')
2956 return;
2957
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002958 if (PyErr_Occurred())
2959 return;
2960
Neal Norwitz0269b912007-08-08 06:56:02 +00002961 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 if (!key)
2963 return;
2964
2965 value = PyDict_GetItem(self->entity, key);
2966
2967 if (value) {
2968 if (TreeBuilder_CheckExact(self->target))
2969 res = treebuilder_handle_data(
2970 (TreeBuilderObject*) self->target, value
2971 );
2972 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002973 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 else
2975 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977 } else if (!PyErr_Occurred()) {
2978 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002979 char message[128] = "undefined entity ";
2980 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002981 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002982 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002984 EXPAT(GetErrorColumnNumber)(self->parser),
2985 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 );
2987 }
2988
2989 Py_DECREF(key);
2990}
2991
2992static void
2993expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2994 const XML_Char **attrib_in)
2995{
2996 PyObject* res;
2997 PyObject* tag;
2998 PyObject* attrib;
2999 int ok;
3000
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003001 if (PyErr_Occurred())
3002 return;
3003
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 /* tag name */
3005 tag = makeuniversal(self, tag_in);
3006 if (!tag)
3007 return; /* parser will look for errors */
3008
3009 /* attributes */
3010 if (attrib_in[0]) {
3011 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003012 if (!attrib) {
3013 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003015 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 while (attrib_in[0] && attrib_in[1]) {
3017 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003018 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003019 if (!key || !value) {
3020 Py_XDECREF(value);
3021 Py_XDECREF(key);
3022 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003023 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024 return;
3025 }
3026 ok = PyDict_SetItem(attrib, key, value);
3027 Py_DECREF(value);
3028 Py_DECREF(key);
3029 if (ok < 0) {
3030 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003031 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003032 return;
3033 }
3034 attrib_in += 2;
3035 }
3036 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003037 Py_INCREF(Py_None);
3038 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003039 }
3040
3041 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042 /* shortcut */
3043 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3044 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003045 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003046 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003047 if (attrib == Py_None) {
3048 Py_DECREF(attrib);
3049 attrib = PyDict_New();
3050 if (!attrib) {
3051 Py_DECREF(tag);
3052 return;
3053 }
3054 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003055 res = PyObject_CallFunctionObjArgs(self->handle_start,
3056 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003057 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058 res = NULL;
3059
3060 Py_DECREF(tag);
3061 Py_DECREF(attrib);
3062
3063 Py_XDECREF(res);
3064}
3065
3066static void
3067expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3068 int data_len)
3069{
3070 PyObject* data;
3071 PyObject* res;
3072
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003073 if (PyErr_Occurred())
3074 return;
3075
Neal Norwitz0269b912007-08-08 06:56:02 +00003076 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003077 if (!data)
3078 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079
3080 if (TreeBuilder_CheckExact(self->target))
3081 /* shortcut */
3082 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3083 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003084 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085 else
3086 res = NULL;
3087
3088 Py_DECREF(data);
3089
3090 Py_XDECREF(res);
3091}
3092
3093static void
3094expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3095{
3096 PyObject* tag;
3097 PyObject* res = NULL;
3098
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003099 if (PyErr_Occurred())
3100 return;
3101
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102 if (TreeBuilder_CheckExact(self->target))
3103 /* shortcut */
3104 /* the standard tree builder doesn't look at the end tag */
3105 res = treebuilder_handle_end(
3106 (TreeBuilderObject*) self->target, Py_None
3107 );
3108 else if (self->handle_end) {
3109 tag = makeuniversal(self, tag_in);
3110 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003111 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003112 Py_DECREF(tag);
3113 }
3114 }
3115
3116 Py_XDECREF(res);
3117}
3118
3119static void
3120expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3121 const XML_Char *uri)
3122{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003123 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3124 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003125
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003126 if (PyErr_Occurred())
3127 return;
3128
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003129 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003130 return;
3131
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003132 if (!uri)
3133 uri = "";
3134 if (!prefix)
3135 prefix = "";
3136
3137 parcel = Py_BuildValue("ss", prefix, uri);
3138 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003139 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003140 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3141 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003142}
3143
3144static void
3145expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3146{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003147 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3148
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003149 if (PyErr_Occurred())
3150 return;
3151
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003152 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003153 return;
3154
3155 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003156}
3157
3158static void
3159expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3160{
3161 PyObject* comment;
3162 PyObject* res;
3163
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003164 if (PyErr_Occurred())
3165 return;
3166
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003167 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003168 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003169 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003170 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3171 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003172 Py_XDECREF(res);
3173 Py_DECREF(comment);
3174 }
3175 }
3176}
3177
Eli Bendersky45839902013-01-13 05:14:47 -08003178static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003179expat_start_doctype_handler(XMLParserObject *self,
3180 const XML_Char *doctype_name,
3181 const XML_Char *sysid,
3182 const XML_Char *pubid,
3183 int has_internal_subset)
3184{
3185 PyObject *self_pyobj = (PyObject *)self;
3186 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3187 PyObject *parser_doctype = NULL;
3188 PyObject *res = NULL;
3189
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003190 if (PyErr_Occurred())
3191 return;
3192
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003193 doctype_name_obj = makeuniversal(self, doctype_name);
3194 if (!doctype_name_obj)
3195 return;
3196
3197 if (sysid) {
3198 sysid_obj = makeuniversal(self, sysid);
3199 if (!sysid_obj) {
3200 Py_DECREF(doctype_name_obj);
3201 return;
3202 }
3203 } else {
3204 Py_INCREF(Py_None);
3205 sysid_obj = Py_None;
3206 }
3207
3208 if (pubid) {
3209 pubid_obj = makeuniversal(self, pubid);
3210 if (!pubid_obj) {
3211 Py_DECREF(doctype_name_obj);
3212 Py_DECREF(sysid_obj);
3213 return;
3214 }
3215 } else {
3216 Py_INCREF(Py_None);
3217 pubid_obj = Py_None;
3218 }
3219
3220 /* If the target has a handler for doctype, call it. */
3221 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003222 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3223 doctype_name_obj, pubid_obj,
3224 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003225 Py_CLEAR(res);
3226 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003227 else {
3228 /* Now see if the parser itself has a doctype method. If yes and it's
3229 * a custom method, call it but warn about deprecation. If it's only
3230 * the vanilla XMLParser method, do nothing.
3231 */
3232 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3233 if (parser_doctype &&
3234 !(PyCFunction_Check(parser_doctype) &&
3235 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3236 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003237 (PyCFunction) _elementtree_XMLParser_doctype)) {
3238 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3239 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003240 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003241 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003242 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003243 res = PyObject_CallFunctionObjArgs(parser_doctype,
3244 doctype_name_obj, pubid_obj,
3245 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003246 Py_CLEAR(res);
3247 }
3248 }
3249
3250clear:
3251 Py_XDECREF(parser_doctype);
3252 Py_DECREF(doctype_name_obj);
3253 Py_DECREF(pubid_obj);
3254 Py_DECREF(sysid_obj);
3255}
3256
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257static void
3258expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3259 const XML_Char* data_in)
3260{
3261 PyObject* target;
3262 PyObject* data;
3263 PyObject* res;
3264
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003265 if (PyErr_Occurred())
3266 return;
3267
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003269 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3270 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003272 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3273 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 Py_XDECREF(res);
3275 Py_DECREF(data);
3276 Py_DECREF(target);
3277 } else {
3278 Py_XDECREF(data);
3279 Py_XDECREF(target);
3280 }
3281 }
3282}
3283
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
Eli Bendersky52467b12012-06-01 07:13:08 +03003286static PyObject *
3287xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288{
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3290 if (self) {
3291 self->parser = NULL;
3292 self->target = self->entity = self->names = NULL;
3293 self->handle_start = self->handle_data = self->handle_end = NULL;
3294 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003295 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003297 return (PyObject *)self;
3298}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299
scoderc8d8e152017-09-14 22:00:03 +02003300static int
3301ignore_attribute_error(PyObject *value)
3302{
3303 if (value == NULL) {
3304 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3305 return -1;
3306 }
3307 PyErr_Clear();
3308 }
3309 return 0;
3310}
3311
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312/*[clinic input]
3313_elementtree.XMLParser.__init__
3314
3315 html: object = NULL
3316 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003317 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318
3319[clinic start generated code]*/
3320
Eli Bendersky52467b12012-06-01 07:13:08 +03003321static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003322_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3323 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003324/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003325{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003326 if (html != NULL) {
3327 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3328 "The html argument of XMLParser() is deprecated",
3329 1) < 0) {
3330 return -1;
3331 }
3332 }
3333
Serhiy Storchakacb985562015-05-04 15:32:48 +03003334 self->entity = PyDict_New();
3335 if (!self->entity)
3336 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
Serhiy Storchakacb985562015-05-04 15:32:48 +03003338 self->names = PyDict_New();
3339 if (!self->names) {
3340 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003341 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003343
Serhiy Storchakacb985562015-05-04 15:32:48 +03003344 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3345 if (!self->parser) {
3346 Py_CLEAR(self->entity);
3347 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003349 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350 }
Miss Islington (bot)470a4352018-09-18 06:11:09 -07003351 /* expat < 2.1.0 has no XML_SetHashSalt() */
3352 if (EXPAT(SetHashSalt) != NULL) {
3353 EXPAT(SetHashSalt)(self->parser,
3354 (unsigned long)_Py_HashSecret.expat.hashsalt);
3355 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356
Eli Bendersky52467b12012-06-01 07:13:08 +03003357 if (target) {
3358 Py_INCREF(target);
3359 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003360 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003362 Py_CLEAR(self->entity);
3363 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003364 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003365 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003366 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003367 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003370 if (ignore_attribute_error(self->handle_start)) {
3371 return -1;
3372 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003373 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003374 if (ignore_attribute_error(self->handle_data)) {
3375 return -1;
3376 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003378 if (ignore_attribute_error(self->handle_end)) {
3379 return -1;
3380 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003381 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003382 if (ignore_attribute_error(self->handle_comment)) {
3383 return -1;
3384 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003385 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003386 if (ignore_attribute_error(self->handle_pi)) {
3387 return -1;
3388 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003389 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003390 if (ignore_attribute_error(self->handle_close)) {
3391 return -1;
3392 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003393 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003394 if (ignore_attribute_error(self->handle_doctype)) {
3395 return -1;
3396 }
Eli Bendersky45839902013-01-13 05:14:47 -08003397
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003399 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003401 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 (XML_StartElementHandler) expat_start_handler,
3403 (XML_EndElementHandler) expat_end_handler
3404 );
3405 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003406 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407 (XML_DefaultHandler) expat_default_handler
3408 );
3409 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003410 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 (XML_CharacterDataHandler) expat_data_handler
3412 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003413 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003415 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 (XML_CommentHandler) expat_comment_handler
3417 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003418 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421 (XML_ProcessingInstructionHandler) expat_pi_handler
3422 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003423 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003424 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003425 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3426 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003428 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003429 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003430 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431
Eli Bendersky52467b12012-06-01 07:13:08 +03003432 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433}
3434
Eli Bendersky52467b12012-06-01 07:13:08 +03003435static int
3436xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3437{
3438 Py_VISIT(self->handle_close);
3439 Py_VISIT(self->handle_pi);
3440 Py_VISIT(self->handle_comment);
3441 Py_VISIT(self->handle_end);
3442 Py_VISIT(self->handle_data);
3443 Py_VISIT(self->handle_start);
3444
3445 Py_VISIT(self->target);
3446 Py_VISIT(self->entity);
3447 Py_VISIT(self->names);
3448
3449 return 0;
3450}
3451
3452static int
3453xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454{
Victor Stinnere727d412017-09-18 05:29:37 -07003455 if (self->parser != NULL) {
3456 XML_Parser parser = self->parser;
3457 self->parser = NULL;
3458 EXPAT(ParserFree)(parser);
3459 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460
Antoine Pitrouc1948842012-10-01 23:40:37 +02003461 Py_CLEAR(self->handle_close);
3462 Py_CLEAR(self->handle_pi);
3463 Py_CLEAR(self->handle_comment);
3464 Py_CLEAR(self->handle_end);
3465 Py_CLEAR(self->handle_data);
3466 Py_CLEAR(self->handle_start);
3467 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468
Antoine Pitrouc1948842012-10-01 23:40:37 +02003469 Py_CLEAR(self->target);
3470 Py_CLEAR(self->entity);
3471 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003472
Eli Bendersky52467b12012-06-01 07:13:08 +03003473 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474}
3475
Eli Bendersky52467b12012-06-01 07:13:08 +03003476static void
3477xmlparser_dealloc(XMLParserObject* self)
3478{
3479 PyObject_GC_UnTrack(self);
3480 xmlparser_gc_clear(self);
3481 Py_TYPE(self)->tp_free((PyObject *)self);
3482}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003483
3484LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003485expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003486{
3487 int ok;
3488
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003489 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3491
3492 if (PyErr_Occurred())
3493 return NULL;
3494
3495 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003496 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003497 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003499 EXPAT(GetErrorColumnNumber)(self->parser),
3500 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501 );
3502 return NULL;
3503 }
3504
3505 Py_RETURN_NONE;
3506}
3507
Serhiy Storchakacb985562015-05-04 15:32:48 +03003508/*[clinic input]
3509_elementtree.XMLParser.close
3510
3511[clinic start generated code]*/
3512
3513static PyObject *
3514_elementtree_XMLParser_close_impl(XMLParserObject *self)
3515/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516{
3517 /* end feeding data to parser */
3518
3519 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003520 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003521 if (!res)
3522 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003524 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525 Py_DECREF(res);
3526 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003527 }
3528 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003529 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003530 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003531 }
3532 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003533 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003534 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003535}
3536
Serhiy Storchakacb985562015-05-04 15:32:48 +03003537/*[clinic input]
3538_elementtree.XMLParser.feed
3539
3540 data: object
3541 /
3542
3543[clinic start generated code]*/
3544
3545static PyObject *
3546_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3547/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548{
3549 /* feed data to parser */
3550
Serhiy Storchakacb985562015-05-04 15:32:48 +03003551 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003552 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003553 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3554 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003555 return NULL;
3556 if (data_len > INT_MAX) {
3557 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3558 return NULL;
3559 }
3560 /* Explicitly set UTF-8 encoding. Return code ignored. */
3561 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003562 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003563 }
3564 else {
3565 Py_buffer view;
3566 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003567 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003568 return NULL;
3569 if (view.len > INT_MAX) {
3570 PyBuffer_Release(&view);
3571 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3572 return NULL;
3573 }
3574 res = expat_parse(self, view.buf, (int)view.len, 0);
3575 PyBuffer_Release(&view);
3576 return res;
3577 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578}
3579
Serhiy Storchakacb985562015-05-04 15:32:48 +03003580/*[clinic input]
3581_elementtree.XMLParser._parse_whole
3582
3583 file: object
3584 /
3585
3586[clinic start generated code]*/
3587
3588static PyObject *
3589_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3590/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591{
Eli Benderskya3699232013-05-19 18:47:23 -07003592 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593 PyObject* reader;
3594 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003595 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003596 PyObject* res;
3597
Serhiy Storchakacb985562015-05-04 15:32:48 +03003598 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599 if (!reader)
3600 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003601
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602 /* read from open file object */
3603 for (;;) {
3604
3605 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3606
3607 if (!buffer) {
3608 /* read failed (e.g. due to KeyboardInterrupt) */
3609 Py_DECREF(reader);
3610 return NULL;
3611 }
3612
Eli Benderskyf996e772012-03-16 05:53:30 +02003613 if (PyUnicode_CheckExact(buffer)) {
3614 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003615 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003616 Py_DECREF(buffer);
3617 break;
3618 }
3619 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003620 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003621 if (!temp) {
3622 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003623 Py_DECREF(reader);
3624 return NULL;
3625 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003626 buffer = temp;
3627 }
3628 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 Py_DECREF(buffer);
3630 break;
3631 }
3632
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003633 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3634 Py_DECREF(buffer);
3635 Py_DECREF(reader);
3636 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3637 return NULL;
3638 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003639 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003640 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641 );
3642
3643 Py_DECREF(buffer);
3644
3645 if (!res) {
3646 Py_DECREF(reader);
3647 return NULL;
3648 }
3649 Py_DECREF(res);
3650
3651 }
3652
3653 Py_DECREF(reader);
3654
3655 res = expat_parse(self, "", 0, 1);
3656
3657 if (res && TreeBuilder_CheckExact(self->target)) {
3658 Py_DECREF(res);
3659 return treebuilder_done((TreeBuilderObject*) self->target);
3660 }
3661
3662 return res;
3663}
3664
Serhiy Storchakacb985562015-05-04 15:32:48 +03003665/*[clinic input]
3666_elementtree.XMLParser.doctype
3667
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003668 name: object
3669 pubid: object
3670 system: object
3671 /
3672
Serhiy Storchakacb985562015-05-04 15:32:48 +03003673[clinic start generated code]*/
3674
3675static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003676_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3677 PyObject *pubid, PyObject *system)
3678/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003679{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003680 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3681 "This method of XMLParser is deprecated. Define"
3682 " doctype() method on the TreeBuilder target.",
3683 1) < 0) {
3684 return NULL;
3685 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003686 Py_RETURN_NONE;
3687}
3688
Serhiy Storchakacb985562015-05-04 15:32:48 +03003689/*[clinic input]
3690_elementtree.XMLParser._setevents
3691
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003692 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003693 events_to_report: object = None
3694 /
3695
3696[clinic start generated code]*/
3697
3698static PyObject *
3699_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3700 PyObject *events_queue,
3701 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003702/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003703{
3704 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003705 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003706 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003707 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003708
3709 if (!TreeBuilder_CheckExact(self->target)) {
3710 PyErr_SetString(
3711 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003712 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003713 "targets"
3714 );
3715 return NULL;
3716 }
3717
3718 target = (TreeBuilderObject*) self->target;
3719
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003720 events_append = PyObject_GetAttrString(events_queue, "append");
3721 if (events_append == NULL)
3722 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003723 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724
3725 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003726 Py_CLEAR(target->start_event_obj);
3727 Py_CLEAR(target->end_event_obj);
3728 Py_CLEAR(target->start_ns_event_obj);
3729 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003730
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003731 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003732 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003733 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003734 Py_RETURN_NONE;
3735 }
3736
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003737 if (!(events_seq = PySequence_Fast(events_to_report,
3738 "events must be a sequence"))) {
3739 return NULL;
3740 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003741
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003742 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003743 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003744 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003745 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003746 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003747 } else if (PyBytes_Check(event_name_obj)) {
3748 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003749 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003750 if (event_name == NULL) {
3751 Py_DECREF(events_seq);
3752 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3753 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003754 }
3755
3756 Py_INCREF(event_name_obj);
3757 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003758 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003759 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003760 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003761 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003762 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003763 EXPAT(SetNamespaceDeclHandler)(
3764 self->parser,
3765 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3766 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3767 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003768 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003769 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003770 EXPAT(SetNamespaceDeclHandler)(
3771 self->parser,
3772 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3773 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3774 );
3775 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003776 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003777 Py_DECREF(events_seq);
3778 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003779 return NULL;
3780 }
3781 }
3782
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003783 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003784 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003785}
3786
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003787static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003788xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003789{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003790 if (PyUnicode_Check(nameobj)) {
3791 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003792 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003793 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003794 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003795 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003796 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003797 return PyUnicode_FromFormat(
3798 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003799 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003800 }
3801 else
3802 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003803
Alexander Belopolskye239d232010-12-08 23:31:48 +00003804 Py_INCREF(res);
3805 return res;
3806 }
3807 generic:
3808 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003809}
3810
Serhiy Storchakacb985562015-05-04 15:32:48 +03003811#include "clinic/_elementtree.c.h"
3812
3813static PyMethodDef element_methods[] = {
3814
3815 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3816
3817 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3818 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3819
3820 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3821 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3822 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3823
3824 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3825 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3826 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3827 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3828
3829 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3830 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3831 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3832
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003833 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003834 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3835
3836 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3837 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3838
3839 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3840
3841 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3842 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3843 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3844 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3845 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3846
3847 {NULL, NULL}
3848};
3849
3850static PyMappingMethods element_as_mapping = {
3851 (lenfunc) element_length,
3852 (binaryfunc) element_subscr,
3853 (objobjargproc) element_ass_subscr,
3854};
3855
Serhiy Storchakadde08152015-11-25 15:28:13 +02003856static PyGetSetDef element_getsetlist[] = {
3857 {"tag",
3858 (getter)element_tag_getter,
3859 (setter)element_tag_setter,
3860 "A string identifying what kind of data this element represents"},
3861 {"text",
3862 (getter)element_text_getter,
3863 (setter)element_text_setter,
3864 "A string of text directly after the start tag, or None"},
3865 {"tail",
3866 (getter)element_tail_getter,
3867 (setter)element_tail_setter,
3868 "A string of text directly after the end tag, or None"},
3869 {"attrib",
3870 (getter)element_attrib_getter,
3871 (setter)element_attrib_setter,
3872 "A dictionary containing the element's attributes"},
3873 {NULL},
3874};
3875
Serhiy Storchakacb985562015-05-04 15:32:48 +03003876static PyTypeObject Element_Type = {
3877 PyVarObject_HEAD_INIT(NULL, 0)
3878 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3879 /* methods */
3880 (destructor)element_dealloc, /* tp_dealloc */
3881 0, /* tp_print */
3882 0, /* tp_getattr */
3883 0, /* tp_setattr */
3884 0, /* tp_reserved */
3885 (reprfunc)element_repr, /* tp_repr */
3886 0, /* tp_as_number */
3887 &element_as_sequence, /* tp_as_sequence */
3888 &element_as_mapping, /* tp_as_mapping */
3889 0, /* tp_hash */
3890 0, /* tp_call */
3891 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003892 PyObject_GenericGetAttr, /* tp_getattro */
3893 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003894 0, /* tp_as_buffer */
3895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3896 /* tp_flags */
3897 0, /* tp_doc */
3898 (traverseproc)element_gc_traverse, /* tp_traverse */
3899 (inquiry)element_gc_clear, /* tp_clear */
3900 0, /* tp_richcompare */
3901 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3902 0, /* tp_iter */
3903 0, /* tp_iternext */
3904 element_methods, /* tp_methods */
3905 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003906 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003907 0, /* tp_base */
3908 0, /* tp_dict */
3909 0, /* tp_descr_get */
3910 0, /* tp_descr_set */
3911 0, /* tp_dictoffset */
3912 (initproc)element_init, /* tp_init */
3913 PyType_GenericAlloc, /* tp_alloc */
3914 element_new, /* tp_new */
3915 0, /* tp_free */
3916};
3917
3918static PyMethodDef treebuilder_methods[] = {
3919 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3920 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3921 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3922 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3923 {NULL, NULL}
3924};
3925
3926static PyTypeObject TreeBuilder_Type = {
3927 PyVarObject_HEAD_INIT(NULL, 0)
3928 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3929 /* methods */
3930 (destructor)treebuilder_dealloc, /* tp_dealloc */
3931 0, /* tp_print */
3932 0, /* tp_getattr */
3933 0, /* tp_setattr */
3934 0, /* tp_reserved */
3935 0, /* tp_repr */
3936 0, /* tp_as_number */
3937 0, /* tp_as_sequence */
3938 0, /* tp_as_mapping */
3939 0, /* tp_hash */
3940 0, /* tp_call */
3941 0, /* tp_str */
3942 0, /* tp_getattro */
3943 0, /* tp_setattro */
3944 0, /* tp_as_buffer */
3945 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3946 /* tp_flags */
3947 0, /* tp_doc */
3948 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3949 (inquiry)treebuilder_gc_clear, /* tp_clear */
3950 0, /* tp_richcompare */
3951 0, /* tp_weaklistoffset */
3952 0, /* tp_iter */
3953 0, /* tp_iternext */
3954 treebuilder_methods, /* tp_methods */
3955 0, /* tp_members */
3956 0, /* tp_getset */
3957 0, /* tp_base */
3958 0, /* tp_dict */
3959 0, /* tp_descr_get */
3960 0, /* tp_descr_set */
3961 0, /* tp_dictoffset */
3962 _elementtree_TreeBuilder___init__, /* tp_init */
3963 PyType_GenericAlloc, /* tp_alloc */
3964 treebuilder_new, /* tp_new */
3965 0, /* tp_free */
3966};
3967
3968static PyMethodDef xmlparser_methods[] = {
3969 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3970 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3971 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3972 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3973 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3974 {NULL, NULL}
3975};
3976
Neal Norwitz227b5332006-03-22 09:28:35 +00003977static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003978 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003979 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003980 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003981 (destructor)xmlparser_dealloc, /* tp_dealloc */
3982 0, /* tp_print */
3983 0, /* tp_getattr */
3984 0, /* tp_setattr */
3985 0, /* tp_reserved */
3986 0, /* tp_repr */
3987 0, /* tp_as_number */
3988 0, /* tp_as_sequence */
3989 0, /* tp_as_mapping */
3990 0, /* tp_hash */
3991 0, /* tp_call */
3992 0, /* tp_str */
3993 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3994 0, /* tp_setattro */
3995 0, /* tp_as_buffer */
3996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3997 /* tp_flags */
3998 0, /* tp_doc */
3999 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4000 (inquiry)xmlparser_gc_clear, /* tp_clear */
4001 0, /* tp_richcompare */
4002 0, /* tp_weaklistoffset */
4003 0, /* tp_iter */
4004 0, /* tp_iternext */
4005 xmlparser_methods, /* tp_methods */
4006 0, /* tp_members */
4007 0, /* tp_getset */
4008 0, /* tp_base */
4009 0, /* tp_dict */
4010 0, /* tp_descr_get */
4011 0, /* tp_descr_set */
4012 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004013 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004014 PyType_GenericAlloc, /* tp_alloc */
4015 xmlparser_new, /* tp_new */
4016 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004017};
4018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004019/* ==================================================================== */
4020/* python module interface */
4021
4022static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08004023 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004024 {NULL, NULL}
4025};
4026
Martin v. Löwis1a214512008-06-11 05:26:20 +00004027
Eli Bendersky532d03e2013-08-10 08:00:39 -07004028static struct PyModuleDef elementtreemodule = {
4029 PyModuleDef_HEAD_INIT,
4030 "_elementtree",
4031 NULL,
4032 sizeof(elementtreestate),
4033 _functions,
4034 NULL,
4035 elementtree_traverse,
4036 elementtree_clear,
4037 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004038};
4039
Neal Norwitzf6657e62006-12-28 04:47:50 +00004040PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004041PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004042{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004043 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004044 elementtreestate *st;
4045
4046 m = PyState_FindModule(&elementtreemodule);
4047 if (m) {
4048 Py_INCREF(m);
4049 return m;
4050 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004051
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004052 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004053 if (PyType_Ready(&ElementIter_Type) < 0)
4054 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004055 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004056 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004057 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004058 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004059 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004060 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004061
Eli Bendersky532d03e2013-08-10 08:00:39 -07004062 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004063 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004064 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004065 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004066
Eli Bendersky828efde2012-04-05 05:40:58 +03004067 if (!(temp = PyImport_ImportModule("copy")))
4068 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004069 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004070 Py_XDECREF(temp);
4071
Victor Stinnerb136f112017-07-10 22:28:02 +02004072 if (st->deepcopy_obj == NULL) {
4073 return NULL;
4074 }
4075
4076 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004077 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004078 return NULL;
4079
Eli Bendersky20d41742012-06-01 09:48:37 +03004080 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004081 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4082 if (expat_capi) {
4083 /* check that it's usable */
4084 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004085 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004086 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4087 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004088 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004089 PyErr_SetString(PyExc_ImportError,
4090 "pyexpat version is incompatible");
4091 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004092 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004093 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004094 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004095 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004096
Eli Bendersky532d03e2013-08-10 08:00:39 -07004097 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004098 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004099 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004100 Py_INCREF(st->parseerror_obj);
4101 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004102
Eli Bendersky092af1f2012-03-04 07:14:03 +02004103 Py_INCREF((PyObject *)&Element_Type);
4104 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4105
Eli Bendersky58d548d2012-05-29 15:45:16 +03004106 Py_INCREF((PyObject *)&TreeBuilder_Type);
4107 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4108
Eli Bendersky52467b12012-06-01 07:13:08 +03004109 Py_INCREF((PyObject *)&XMLParser_Type);
4110 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004111
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004112 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004113}