blob: 3118b55c874ba549cb20aa34a76c128d834b7a27 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Miss Islington (bot)b1c80032018-10-14 00:55:49 -0700207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700236dealloc_extra(ElementObjectExtra *extra)
237{
238 Py_ssize_t i;
239
240 if (!extra)
241 return;
242
243 Py_DECREF(extra->attrib);
244
245 for (i = 0; i < extra->length; i++)
246 Py_DECREF(extra->children[i]);
247
248 if (extra->children != extra->_children)
249 PyObject_Free(extra->children);
250
251 PyObject_Free(extra);
252}
253
254LOCAL(void)
255clear_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
Eli Bendersky08b85292012-04-04 15:55:07 +0300257 ElementObjectExtra *myextra;
Eli Bendersky08b85292012-04-04 15:55:07 +0300258
Eli Benderskyebf37a22012-04-03 22:02:37 +0300259 if (!self->extra)
260 return;
261
262 /* Avoid DECREFs calling into this code again (cycles, etc.)
263 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300264 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300265 self->extra = NULL;
266
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700267 dealloc_extra(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268}
269
Eli Bendersky092af1f2012-03-04 07:14:03 +0200270/* Convenience internal function to create new Element objects with the given
271 * tag and attributes.
272*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200274create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275{
276 ElementObject* self;
277
Eli Bendersky0192ba32012-03-30 16:38:33 +0300278 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000279 if (self == NULL)
280 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000281 self->extra = NULL;
282
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000283 Py_INCREF(tag);
284 self->tag = tag;
285
286 Py_INCREF(Py_None);
287 self->text = Py_None;
288
289 Py_INCREF(Py_None);
290 self->tail = Py_None;
291
Eli Benderskyebf37a22012-04-03 22:02:37 +0300292 self->weakreflist = NULL;
293
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200294 ALLOC(sizeof(ElementObject), "create element");
295 PyObject_GC_Track(self);
296
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200297 if (attrib != Py_None && !is_empty_dict(attrib)) {
298 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200300 return NULL;
301 }
302 }
303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000304 return (PyObject*) self;
305}
306
Eli Bendersky092af1f2012-03-04 07:14:03 +0200307static PyObject *
308element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
309{
310 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
311 if (e != NULL) {
312 Py_INCREF(Py_None);
313 e->tag = Py_None;
314
315 Py_INCREF(Py_None);
316 e->text = Py_None;
317
318 Py_INCREF(Py_None);
319 e->tail = Py_None;
320
321 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300322 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200323 }
324 return (PyObject *)e;
325}
326
Eli Bendersky737b1732012-05-29 06:02:56 +0300327/* Helper function for extracting the attrib dictionary from a keywords dict.
328 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800329 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700331 *
332 * Return a dictionary with the content of kwds merged into the content of
333 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 */
335static PyObject*
336get_attrib_from_keywords(PyObject *kwds)
337{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700338 PyObject *attrib_str = PyUnicode_FromString("attrib");
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700339 if (attrib_str == NULL) {
340 return NULL;
341 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343
344 if (attrib) {
345 /* If attrib was found in kwds, copy its value and remove it from
346 * kwds
347 */
348 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700349 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300350 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
351 Py_TYPE(attrib)->tp_name);
352 return NULL;
353 }
354 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700355 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300356 } else {
357 attrib = PyDict_New();
358 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359
360 Py_DECREF(attrib_str);
361
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700362 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
363 Py_DECREF(attrib);
364 return NULL;
365 }
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 return attrib;
367}
368
Serhiy Storchakacb985562015-05-04 15:32:48 +0300369/*[clinic input]
370module _elementtree
371class _elementtree.Element "ElementObject *" "&Element_Type"
372class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
373class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
374[clinic start generated code]*/
375/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
376
Eli Bendersky092af1f2012-03-04 07:14:03 +0200377static int
378element_init(PyObject *self, PyObject *args, PyObject *kwds)
379{
380 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381 PyObject *attrib = NULL;
382 ElementObject *self_elem;
383
384 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
385 return -1;
386
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (attrib) {
388 /* attrib passed as positional arg */
389 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200390 if (!attrib)
391 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (kwds) {
393 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300395 return -1;
396 }
397 }
398 } else if (kwds) {
399 /* have keywords args */
400 attrib = get_attrib_from_keywords(kwds);
401 if (!attrib)
402 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 }
404
405 self_elem = (ElementObject *)self;
406
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 return -1;
411 }
412 }
413
Eli Bendersky48d358b2012-05-30 17:57:50 +0300414 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416
417 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200418 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300419 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200420
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300422 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200423
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300425 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426
427 return 0;
428}
429
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200433 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000434 PyObject* *children;
435
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700436 assert(extra >= 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200445 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200456 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
457 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000458 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000459 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100460 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000461 * false alarm always assume at least one child to be safe.
462 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000463 children = PyObject_Realloc(self->extra->children,
464 size * sizeof(PyObject*));
465 if (!children)
466 goto nomemory;
467 } else {
468 children = PyObject_Malloc(size * sizeof(PyObject*));
469 if (!children)
470 goto nomemory;
471 /* copy existing children from static area to malloc buffer */
472 memcpy(children, self->extra->children,
473 self->extra->length * sizeof(PyObject*));
474 }
475 self->extra->children = children;
476 self->extra->allocated = size;
477 }
478
479 return 0;
480
481 nomemory:
482 PyErr_NoMemory();
483 return -1;
484}
485
486LOCAL(int)
487element_add_subelement(ElementObject* self, PyObject* element)
488{
489 /* add a child element to a parent */
490
491 if (element_resize(self, 1) < 0)
492 return -1;
493
494 Py_INCREF(element);
495 self->extra->children[self->extra->length] = element;
496
497 self->extra->length++;
498
499 return 0;
500}
501
502LOCAL(PyObject*)
503element_get_attrib(ElementObject* self)
504{
505 /* return borrowed reference to attrib dictionary */
506 /* note: this function assumes that the extra section exists */
507
508 PyObject* res = self->extra->attrib;
509
510 if (res == Py_None) {
511 /* create missing dictionary */
512 res = PyDict_New();
513 if (!res)
514 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200515 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000516 self->extra->attrib = res;
517 }
518
519 return res;
520}
521
522LOCAL(PyObject*)
523element_get_text(ElementObject* self)
524{
525 /* return borrowed reference to text attribute */
526
Serhiy Storchaka576def02017-03-30 09:47:31 +0300527 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000528
529 if (JOIN_GET(res)) {
530 res = JOIN_OBJ(res);
531 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300532 PyObject *tmp = list_join(res);
533 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000534 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300535 self->text = tmp;
536 Py_DECREF(res);
537 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 }
539 }
540
541 return res;
542}
543
544LOCAL(PyObject*)
545element_get_tail(ElementObject* self)
546{
547 /* return borrowed reference to text attribute */
548
Serhiy Storchaka576def02017-03-30 09:47:31 +0300549 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550
551 if (JOIN_GET(res)) {
552 res = JOIN_OBJ(res);
553 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300554 PyObject *tmp = list_join(res);
555 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000556 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300557 self->tail = tmp;
558 Py_DECREF(res);
559 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000560 }
561 }
562
563 return res;
564}
565
566static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300567subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000568{
569 PyObject* elem;
570
571 ElementObject* parent;
572 PyObject* tag;
573 PyObject* attrib = NULL;
574 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
575 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800576 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800578 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579
Eli Bendersky737b1732012-05-29 06:02:56 +0300580 if (attrib) {
581 /* attrib passed as positional arg */
582 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 if (!attrib)
584 return NULL;
Miss Islington (bot)c46f0422018-10-23 12:45:44 -0700585 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
586 Py_DECREF(attrib);
587 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300588 }
589 } else if (kwds) {
590 /* have keyword args */
591 attrib = get_attrib_from_keywords(kwds);
592 if (!attrib)
593 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300595 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 Py_INCREF(Py_None);
597 attrib = Py_None;
598 }
599
Eli Bendersky092af1f2012-03-04 07:14:03 +0200600 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200602 if (elem == NULL)
603 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000604
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000605 if (element_add_subelement(parent, elem) < 0) {
606 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000607 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000608 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609
610 return elem;
611}
612
Eli Bendersky0192ba32012-03-30 16:38:33 +0300613static int
614element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
615{
616 Py_VISIT(self->tag);
617 Py_VISIT(JOIN_OBJ(self->text));
618 Py_VISIT(JOIN_OBJ(self->tail));
619
620 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200621 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 Py_VISIT(self->extra->attrib);
623
624 for (i = 0; i < self->extra->length; ++i)
625 Py_VISIT(self->extra->children[i]);
626 }
627 return 0;
628}
629
630static int
631element_gc_clear(ElementObject *self)
632{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700634 _clear_joined_ptr(&self->text);
635 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636
637 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300638 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300639 */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700640 clear_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 return 0;
642}
643
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644static void
645element_dealloc(ElementObject* self)
646{
INADA Naokia6296d32017-08-24 14:55:17 +0900647 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200649 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300650
651 if (self->weakreflist != NULL)
652 PyObject_ClearWeakRefs((PyObject *) self);
653
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 /* element_gc_clear clears all references and deallocates extra
655 */
656 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000657
658 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200659 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200660 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000661}
662
663/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000664
Serhiy Storchakacb985562015-05-04 15:32:48 +0300665/*[clinic input]
666_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668 subelement: object(subclass_of='&Element_Type')
669 /
670
671[clinic start generated code]*/
672
673static PyObject *
674_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
675/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
676{
677 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000678 return NULL;
679
680 Py_RETURN_NONE;
681}
682
Serhiy Storchakacb985562015-05-04 15:32:48 +0300683/*[clinic input]
684_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000685
Serhiy Storchakacb985562015-05-04 15:32:48 +0300686[clinic start generated code]*/
687
688static PyObject *
689_elementtree_Element_clear_impl(ElementObject *self)
690/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
691{
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700692 clear_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693
694 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300695 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696
697 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300698 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 Py_RETURN_NONE;
701}
702
Serhiy Storchakacb985562015-05-04 15:32:48 +0300703/*[clinic input]
704_elementtree.Element.__copy__
705
706[clinic start generated code]*/
707
708static PyObject *
709_elementtree_Element___copy___impl(ElementObject *self)
710/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200712 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713 ElementObject* element;
714
Eli Bendersky092af1f2012-03-04 07:14:03 +0200715 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800716 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717 if (!element)
718 return NULL;
719
Oren Milman39ecb9c2017-10-10 23:26:24 +0300720 Py_INCREF(JOIN_OBJ(self->text));
721 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722
Oren Milman39ecb9c2017-10-10 23:26:24 +0300723 Py_INCREF(JOIN_OBJ(self->tail));
724 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700726 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000727 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000728 if (element_resize(element, self->extra->length) < 0) {
729 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000731 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000732
733 for (i = 0; i < self->extra->length; i++) {
734 Py_INCREF(self->extra->children[i]);
735 element->extra->children[i] = self->extra->children[i];
736 }
737
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700738 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000740 }
741
742 return (PyObject*) element;
743}
744
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200745/* Helper for a deep copy. */
746LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
747
Serhiy Storchakacb985562015-05-04 15:32:48 +0300748/*[clinic input]
749_elementtree.Element.__deepcopy__
750
Oren Milmand0568182017-09-12 17:39:15 +0300751 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300752 /
753
754[clinic start generated code]*/
755
756static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300757_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
758/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000759{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200760 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000761 ElementObject* element;
762 PyObject* tag;
763 PyObject* attrib;
764 PyObject* text;
765 PyObject* tail;
766 PyObject* id;
767
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000768 tag = deepcopy(self->tag, memo);
769 if (!tag)
770 return NULL;
771
772 if (self->extra) {
773 attrib = deepcopy(self->extra->attrib, memo);
774 if (!attrib) {
775 Py_DECREF(tag);
776 return NULL;
777 }
778 } else {
779 Py_INCREF(Py_None);
780 attrib = Py_None;
781 }
782
Eli Bendersky092af1f2012-03-04 07:14:03 +0200783 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784
785 Py_DECREF(tag);
786 Py_DECREF(attrib);
787
788 if (!element)
789 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000791 text = deepcopy(JOIN_OBJ(self->text), memo);
792 if (!text)
793 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300794 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000795
796 tail = deepcopy(JOIN_OBJ(self->tail), memo);
797 if (!tail)
798 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300799 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700801 assert(!element->extra || !element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803 if (element_resize(element, self->extra->length) < 0)
804 goto error;
805
806 for (i = 0; i < self->extra->length; i++) {
807 PyObject* child = deepcopy(self->extra->children[i], memo);
808 if (!child) {
809 element->extra->length = i;
810 goto error;
811 }
812 element->extra->children[i] = child;
813 }
814
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700815 assert(!element->extra->length);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000816 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000817 }
818
819 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700820 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000821 if (!id)
822 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000823
824 i = PyDict_SetItem(memo, id, (PyObject*) element);
825
826 Py_DECREF(id);
827
828 if (i < 0)
829 goto error;
830
831 return (PyObject*) element;
832
833 error:
834 Py_DECREF(element);
835 return NULL;
836}
837
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200838LOCAL(PyObject *)
839deepcopy(PyObject *object, PyObject *memo)
840{
841 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200842 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200843 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200844
845 /* Fast paths */
846 if (object == Py_None || PyUnicode_CheckExact(object)) {
847 Py_INCREF(object);
848 return object;
849 }
850
851 if (Py_REFCNT(object) == 1) {
852 if (PyDict_CheckExact(object)) {
853 PyObject *key, *value;
854 Py_ssize_t pos = 0;
855 int simple = 1;
856 while (PyDict_Next(object, &pos, &key, &value)) {
857 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
858 simple = 0;
859 break;
860 }
861 }
862 if (simple)
863 return PyDict_Copy(object);
864 /* Fall through to general case */
865 }
866 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300867 return _elementtree_Element___deepcopy___impl(
868 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200869 }
870 }
871
872 /* General case */
873 st = ET_STATE_GLOBAL;
874 if (!st->deepcopy_obj) {
875 PyErr_SetString(PyExc_RuntimeError,
876 "deepcopy helper not found");
877 return NULL;
878 }
879
Victor Stinner7fbac452016-08-20 01:34:44 +0200880 stack[0] = object;
881 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200882 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200883}
884
885
Serhiy Storchakacb985562015-05-04 15:32:48 +0300886/*[clinic input]
887_elementtree.Element.__sizeof__ -> Py_ssize_t
888
889[clinic start generated code]*/
890
891static Py_ssize_t
892_elementtree_Element___sizeof___impl(ElementObject *self)
893/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200894{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200895 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200896 if (self->extra) {
897 result += sizeof(ElementObjectExtra);
898 if (self->extra->children != self->extra->_children)
899 result += sizeof(PyObject*) * self->extra->allocated;
900 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300901 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200902}
903
Eli Bendersky698bdb22013-01-10 06:01:06 -0800904/* dict keys for getstate/setstate. */
905#define PICKLED_TAG "tag"
906#define PICKLED_CHILDREN "_children"
907#define PICKLED_ATTRIB "attrib"
908#define PICKLED_TAIL "tail"
909#define PICKLED_TEXT "text"
910
911/* __getstate__ returns a fabricated instance dict as in the pure-Python
912 * Element implementation, for interoperability/interchangeability. This
913 * makes the pure-Python implementation details an API, but (a) there aren't
914 * any unnecessary structures there; and (b) it buys compatibility with 3.2
915 * pickles. See issue #16076.
916 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300917/*[clinic input]
918_elementtree.Element.__getstate__
919
920[clinic start generated code]*/
921
Eli Bendersky698bdb22013-01-10 06:01:06 -0800922static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300923_elementtree_Element___getstate___impl(ElementObject *self)
924/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200926 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800927 PyObject *instancedict = NULL, *children;
928
929 /* Build a list of children. */
930 children = PyList_New(self->extra ? self->extra->length : 0);
931 if (!children)
932 return NULL;
933 for (i = 0; i < PyList_GET_SIZE(children); i++) {
934 PyObject *child = self->extra->children[i];
935 Py_INCREF(child);
936 PyList_SET_ITEM(children, i, child);
937 }
938
939 /* Construct the state object. */
940 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
941 if (noattrib)
942 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
943 PICKLED_TAG, self->tag,
944 PICKLED_CHILDREN, children,
945 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700946 PICKLED_TEXT, JOIN_OBJ(self->text),
947 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800948 else
949 instancedict = Py_BuildValue("{sOsOsOsOsO}",
950 PICKLED_TAG, self->tag,
951 PICKLED_CHILDREN, children,
952 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700953 PICKLED_TEXT, JOIN_OBJ(self->text),
954 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800955 if (instancedict) {
956 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800957 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800958 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800959 else {
960 for (i = 0; i < PyList_GET_SIZE(children); i++)
961 Py_DECREF(PyList_GET_ITEM(children, i));
962 Py_DECREF(children);
963
964 return NULL;
965 }
966}
967
968static PyObject *
969element_setstate_from_attributes(ElementObject *self,
970 PyObject *tag,
971 PyObject *attrib,
972 PyObject *text,
973 PyObject *tail,
974 PyObject *children)
975{
976 Py_ssize_t i, nchildren;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700977 ElementObjectExtra *oldextra = NULL;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978
979 if (!tag) {
980 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
981 return NULL;
982 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800983
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200984 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300985 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986
Oren Milman39ecb9c2017-10-10 23:26:24 +0300987 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
988 Py_INCREF(JOIN_OBJ(text));
989 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800990
Oren Milman39ecb9c2017-10-10 23:26:24 +0300991 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
992 Py_INCREF(JOIN_OBJ(tail));
993 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800994
995 /* Handle ATTRIB and CHILDREN. */
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700996 if (!children && !attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -0800997 Py_RETURN_NONE;
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -0700998 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800999
1000 /* Compute 'nchildren'. */
1001 if (children) {
1002 if (!PyList_Check(children)) {
1003 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1004 return NULL;
1005 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001006 nchildren = PyList_GET_SIZE(children);
1007
1008 /* (Re-)allocate 'extra'.
1009 Avoid DECREFs calling into this code again (cycles, etc.)
1010 */
1011 oldextra = self->extra;
1012 self->extra = NULL;
1013 if (element_resize(self, nchildren)) {
1014 assert(!self->extra || !self->extra->length);
1015 clear_extra(self);
1016 self->extra = oldextra;
1017 return NULL;
1018 }
1019 assert(self->extra);
1020 assert(self->extra->allocated >= nchildren);
1021 if (oldextra) {
1022 assert(self->extra->attrib == Py_None);
1023 self->extra->attrib = oldextra->attrib;
1024 oldextra->attrib = Py_None;
1025 }
1026
1027 /* Copy children */
1028 for (i = 0; i < nchildren; i++) {
1029 self->extra->children[i] = PyList_GET_ITEM(children, i);
1030 Py_INCREF(self->extra->children[i]);
1031 }
1032
1033 assert(!self->extra->length);
1034 self->extra->length = nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 }
1036 else {
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001037 if (element_resize(self, 0)) {
1038 return NULL;
1039 }
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040 }
1041
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042 /* Stash attrib. */
1043 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001045 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001046 }
Miss Islington (bot)5b9b9352018-10-18 00:17:15 -07001047 dealloc_extra(oldextra);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001048
1049 Py_RETURN_NONE;
1050}
1051
1052/* __setstate__ for Element instance from the Python implementation.
1053 * 'state' should be the instance dict.
1054 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055
Eli Bendersky698bdb22013-01-10 06:01:06 -08001056static PyObject *
1057element_setstate_from_Python(ElementObject *self, PyObject *state)
1058{
1059 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1060 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1061 PyObject *args;
1062 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001063 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001064
Eli Bendersky698bdb22013-01-10 06:01:06 -08001065 tag = attrib = text = tail = children = NULL;
1066 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001067 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001068 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001069
1070 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1071 &attrib, &text, &tail, &children))
1072 retval = element_setstate_from_attributes(self, tag, attrib, text,
1073 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001074 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001075 retval = NULL;
1076
1077 Py_DECREF(args);
1078 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001079}
1080
Serhiy Storchakacb985562015-05-04 15:32:48 +03001081/*[clinic input]
1082_elementtree.Element.__setstate__
1083
1084 state: object
1085 /
1086
1087[clinic start generated code]*/
1088
Eli Bendersky698bdb22013-01-10 06:01:06 -08001089static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001090_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1091/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001092{
1093 if (!PyDict_CheckExact(state)) {
1094 PyErr_Format(PyExc_TypeError,
1095 "Don't know how to unpickle \"%.200R\" as an Element",
1096 state);
1097 return NULL;
1098 }
1099 else
1100 return element_setstate_from_Python(self, state);
1101}
1102
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001103LOCAL(int)
1104checkpath(PyObject* tag)
1105{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001106 Py_ssize_t i;
1107 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001108
1109 /* check if a tag contains an xpath character */
1110
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001111#define PATHCHAR(ch) \
1112 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001114 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001115 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1116 void *data = PyUnicode_DATA(tag);
1117 unsigned int kind = PyUnicode_KIND(tag);
1118 for (i = 0; i < len; i++) {
1119 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1120 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001121 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001123 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001124 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 return 1;
1126 }
1127 return 0;
1128 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001129 if (PyBytes_Check(tag)) {
1130 char *p = PyBytes_AS_STRING(tag);
1131 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001132 if (p[i] == '{')
1133 check = 0;
1134 else if (p[i] == '}')
1135 check = 1;
1136 else if (check && PATHCHAR(p[i]))
1137 return 1;
1138 }
1139 return 0;
1140 }
1141
1142 return 1; /* unknown type; might be path expression */
1143}
1144
Serhiy Storchakacb985562015-05-04 15:32:48 +03001145/*[clinic input]
1146_elementtree.Element.extend
1147
1148 elements: object
1149 /
1150
1151[clinic start generated code]*/
1152
1153static PyObject *
1154_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1155/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001156{
1157 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001158 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001161 if (!seq) {
1162 PyErr_Format(
1163 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001164 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001165 );
1166 return NULL;
1167 }
1168
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001170 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001171 Py_INCREF(element);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001172 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001173 PyErr_Format(
1174 PyExc_TypeError,
1175 "expected an Element, not \"%.200s\"",
1176 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001177 Py_DECREF(seq);
1178 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001179 return NULL;
1180 }
1181
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001182 if (element_add_subelement(self, element) < 0) {
1183 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001184 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001185 return NULL;
1186 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001187 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001188 }
1189
1190 Py_DECREF(seq);
1191
1192 Py_RETURN_NONE;
1193}
1194
Serhiy Storchakacb985562015-05-04 15:32:48 +03001195/*[clinic input]
1196_elementtree.Element.find
1197
1198 path: object
1199 namespaces: object = None
1200
1201[clinic start generated code]*/
1202
1203static PyObject *
1204_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1205 PyObject *namespaces)
1206/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001207{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001208 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001209 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001210
Serhiy Storchakacb985562015-05-04 15:32:48 +03001211 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001212 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001213 return _PyObject_CallMethodIdObjArgs(
1214 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001216 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217
1218 if (!self->extra)
1219 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001220
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 for (i = 0; i < self->extra->length; i++) {
1222 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001224 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001225 continue;
1226 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001227 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001228 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001230 Py_DECREF(item);
1231 if (rc < 0)
1232 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001233 }
1234
1235 Py_RETURN_NONE;
1236}
1237
Serhiy Storchakacb985562015-05-04 15:32:48 +03001238/*[clinic input]
1239_elementtree.Element.findtext
1240
1241 path: object
1242 default: object = None
1243 namespaces: object = None
1244
1245[clinic start generated code]*/
1246
1247static PyObject *
1248_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1249 PyObject *default_value,
1250 PyObject *namespaces)
1251/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001253 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001254 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001255 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001256
Serhiy Storchakacb985562015-05-04 15:32:48 +03001257 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001258 return _PyObject_CallMethodIdObjArgs(
1259 st->elementpath_obj, &PyId_findtext,
1260 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001261 );
1262
1263 if (!self->extra) {
1264 Py_INCREF(default_value);
1265 return default_value;
1266 }
1267
1268 for (i = 0; i < self->extra->length; i++) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001269 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001270 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001271 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001272 continue;
1273 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001274 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001275 if (rc > 0) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001276 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001277 if (text == Py_None) {
1278 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001279 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001280 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001281 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001282 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001283 return text;
1284 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001285 Py_DECREF(item);
1286 if (rc < 0)
1287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001288 }
1289
1290 Py_INCREF(default_value);
1291 return default_value;
1292}
1293
Serhiy Storchakacb985562015-05-04 15:32:48 +03001294/*[clinic input]
1295_elementtree.Element.findall
1296
1297 path: object
1298 namespaces: object = None
1299
1300[clinic start generated code]*/
1301
1302static PyObject *
1303_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1304 PyObject *namespaces)
1305/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001306{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001307 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001308 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001309 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001311 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001312 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001313 return _PyObject_CallMethodIdObjArgs(
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001314 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001316 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
1318 out = PyList_New(0);
1319 if (!out)
1320 return NULL;
1321
1322 if (!self->extra)
1323 return out;
1324
1325 for (i = 0; i < self->extra->length; i++) {
1326 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001327 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001328 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001329 continue;
1330 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001331 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001332 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1333 Py_DECREF(item);
1334 Py_DECREF(out);
1335 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001337 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338 }
1339
1340 return out;
1341}
1342
Serhiy Storchakacb985562015-05-04 15:32:48 +03001343/*[clinic input]
1344_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001345
Serhiy Storchakacb985562015-05-04 15:32:48 +03001346 path: object
1347 namespaces: object = None
1348
1349[clinic start generated code]*/
1350
1351static PyObject *
1352_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1353 PyObject *namespaces)
1354/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1355{
1356 PyObject* tag = path;
1357 _Py_IDENTIFIER(iterfind);
1358 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001359
Victor Stinnerf5616342016-12-09 15:26:00 +01001360 return _PyObject_CallMethodIdObjArgs(
1361 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362}
1363
Serhiy Storchakacb985562015-05-04 15:32:48 +03001364/*[clinic input]
1365_elementtree.Element.get
1366
1367 key: object
1368 default: object = None
1369
1370[clinic start generated code]*/
1371
1372static PyObject *
1373_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1374 PyObject *default_value)
1375/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376{
1377 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001378
1379 if (!self->extra || self->extra->attrib == Py_None)
1380 value = default_value;
1381 else {
1382 value = PyDict_GetItem(self->extra->attrib, key);
1383 if (!value)
1384 value = default_value;
1385 }
1386
1387 Py_INCREF(value);
1388 return value;
1389}
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.getchildren
1393
1394[clinic start generated code]*/
1395
1396static PyObject *
1397_elementtree_Element_getchildren_impl(ElementObject *self)
1398/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001400 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401 PyObject* list;
1402
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001403 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1404 "This method will be removed in future versions. "
1405 "Use 'list(elem)' or iteration over elem instead.",
1406 1) < 0) {
1407 return NULL;
1408 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001409
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001410 if (!self->extra)
1411 return PyList_New(0);
1412
1413 list = PyList_New(self->extra->length);
1414 if (!list)
1415 return NULL;
1416
1417 for (i = 0; i < self->extra->length; i++) {
1418 PyObject* item = self->extra->children[i];
1419 Py_INCREF(item);
1420 PyList_SET_ITEM(list, i, item);
1421 }
1422
1423 return list;
1424}
1425
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001426
Eli Bendersky64d11e62012-06-15 07:42:50 +03001427static PyObject *
1428create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1429
1430
Serhiy Storchakacb985562015-05-04 15:32:48 +03001431/*[clinic input]
1432_elementtree.Element.iter
1433
1434 tag: object = None
1435
1436[clinic start generated code]*/
1437
Eli Bendersky64d11e62012-06-15 07:42:50 +03001438static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001439_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1440/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001441{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001442 if (PyUnicode_Check(tag)) {
1443 if (PyUnicode_READY(tag) < 0)
1444 return NULL;
1445 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1446 tag = Py_None;
1447 }
1448 else if (PyBytes_Check(tag)) {
1449 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1450 tag = Py_None;
1451 }
1452
Eli Bendersky64d11e62012-06-15 07:42:50 +03001453 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001454}
1455
1456
Serhiy Storchakacb985562015-05-04 15:32:48 +03001457/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001458_elementtree.Element.getiterator
1459
1460 tag: object = None
1461
1462[clinic start generated code]*/
1463
1464static PyObject *
1465_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1466/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1467{
1468 /* Change for a DeprecationWarning in 1.4 */
1469 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1470 "This method will be removed in future versions. "
1471 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1472 1) < 0) {
1473 return NULL;
1474 }
1475 return _elementtree_Element_iter_impl(self, tag);
1476}
1477
1478
1479/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001480_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482[clinic start generated code]*/
1483
1484static PyObject *
1485_elementtree_Element_itertext_impl(ElementObject *self)
1486/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1487{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001488 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489}
1490
Eli Bendersky64d11e62012-06-15 07:42:50 +03001491
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001493element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001495 ElementObject* self = (ElementObject*) self_;
1496
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497 if (!self->extra || index < 0 || index >= self->extra->length) {
1498 PyErr_SetString(
1499 PyExc_IndexError,
1500 "child index out of range"
1501 );
1502 return NULL;
1503 }
1504
1505 Py_INCREF(self->extra->children[index]);
1506 return self->extra->children[index];
1507}
1508
Serhiy Storchakacb985562015-05-04 15:32:48 +03001509/*[clinic input]
1510_elementtree.Element.insert
1511
1512 index: Py_ssize_t
1513 subelement: object(subclass_of='&Element_Type')
1514 /
1515
1516[clinic start generated code]*/
1517
1518static PyObject *
1519_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1520 PyObject *subelement)
1521/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001523 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524
Victor Stinner5f0af232013-07-11 23:01:36 +02001525 if (!self->extra) {
1526 if (create_extra(self, NULL) < 0)
1527 return NULL;
1528 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001530 if (index < 0) {
1531 index += self->extra->length;
1532 if (index < 0)
1533 index = 0;
1534 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535 if (index > self->extra->length)
1536 index = self->extra->length;
1537
1538 if (element_resize(self, 1) < 0)
1539 return NULL;
1540
1541 for (i = self->extra->length; i > index; i--)
1542 self->extra->children[i] = self->extra->children[i-1];
1543
Serhiy Storchakacb985562015-05-04 15:32:48 +03001544 Py_INCREF(subelement);
1545 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546
1547 self->extra->length++;
1548
1549 Py_RETURN_NONE;
1550}
1551
Serhiy Storchakacb985562015-05-04 15:32:48 +03001552/*[clinic input]
1553_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555[clinic start generated code]*/
1556
1557static PyObject *
1558_elementtree_Element_items_impl(ElementObject *self)
1559/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1560{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561 if (!self->extra || self->extra->attrib == Py_None)
1562 return PyList_New(0);
1563
1564 return PyDict_Items(self->extra->attrib);
1565}
1566
Serhiy Storchakacb985562015-05-04 15:32:48 +03001567/*[clinic input]
1568_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569
Serhiy Storchakacb985562015-05-04 15:32:48 +03001570[clinic start generated code]*/
1571
1572static PyObject *
1573_elementtree_Element_keys_impl(ElementObject *self)
1574/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1575{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001576 if (!self->extra || self->extra->attrib == Py_None)
1577 return PyList_New(0);
1578
1579 return PyDict_Keys(self->extra->attrib);
1580}
1581
Martin v. Löwis18e16552006-02-15 17:27:45 +00001582static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583element_length(ElementObject* self)
1584{
1585 if (!self->extra)
1586 return 0;
1587
1588 return self->extra->length;
1589}
1590
Serhiy Storchakacb985562015-05-04 15:32:48 +03001591/*[clinic input]
1592_elementtree.Element.makeelement
1593
1594 tag: object
1595 attrib: object
1596 /
1597
1598[clinic start generated code]*/
1599
1600static PyObject *
1601_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1602 PyObject *attrib)
1603/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604{
1605 PyObject* elem;
1606
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 attrib = PyDict_Copy(attrib);
1608 if (!attrib)
1609 return NULL;
1610
Eli Bendersky092af1f2012-03-04 07:14:03 +02001611 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612
1613 Py_DECREF(attrib);
1614
1615 return elem;
1616}
1617
Serhiy Storchakacb985562015-05-04 15:32:48 +03001618/*[clinic input]
1619_elementtree.Element.remove
1620
1621 subelement: object(subclass_of='&Element_Type')
1622 /
1623
1624[clinic start generated code]*/
1625
1626static PyObject *
1627_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1628/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001630 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001631 int rc;
1632 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634 if (!self->extra) {
1635 /* element has no children, so raise exception */
1636 PyErr_SetString(
1637 PyExc_ValueError,
1638 "list.remove(x): x not in list"
1639 );
1640 return NULL;
1641 }
1642
1643 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001644 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001646 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001647 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001648 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001649 if (rc < 0)
1650 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651 }
1652
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001653 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001654 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655 PyErr_SetString(
1656 PyExc_ValueError,
1657 "list.remove(x): x not in list"
1658 );
1659 return NULL;
1660 }
1661
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001662 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001663
1664 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665 for (; i < self->extra->length; i++)
1666 self->extra->children[i] = self->extra->children[i+1];
1667
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001668 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669 Py_RETURN_NONE;
1670}
1671
1672static PyObject*
1673element_repr(ElementObject* self)
1674{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001675 int status;
1676
1677 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001678 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001679
1680 status = Py_ReprEnter((PyObject *)self);
1681 if (status == 0) {
1682 PyObject *res;
1683 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1684 Py_ReprLeave((PyObject *)self);
1685 return res;
1686 }
1687 if (status > 0)
1688 PyErr_Format(PyExc_RuntimeError,
1689 "reentrant call inside %s.__repr__",
1690 Py_TYPE(self)->tp_name);
1691 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692}
1693
Serhiy Storchakacb985562015-05-04 15:32:48 +03001694/*[clinic input]
1695_elementtree.Element.set
1696
1697 key: object
1698 value: object
1699 /
1700
1701[clinic start generated code]*/
1702
1703static PyObject *
1704_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1705 PyObject *value)
1706/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001707{
1708 PyObject* attrib;
1709
Victor Stinner5f0af232013-07-11 23:01:36 +02001710 if (!self->extra) {
1711 if (create_extra(self, NULL) < 0)
1712 return NULL;
1713 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001714
1715 attrib = element_get_attrib(self);
1716 if (!attrib)
1717 return NULL;
1718
1719 if (PyDict_SetItem(attrib, key, value) < 0)
1720 return NULL;
1721
1722 Py_RETURN_NONE;
1723}
1724
1725static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001726element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001727{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001728 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001729 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001730 PyObject* old;
1731
1732 if (!self->extra || index < 0 || index >= self->extra->length) {
1733 PyErr_SetString(
1734 PyExc_IndexError,
1735 "child assignment index out of range");
1736 return -1;
1737 }
1738
1739 old = self->extra->children[index];
1740
1741 if (item) {
1742 Py_INCREF(item);
1743 self->extra->children[index] = item;
1744 } else {
1745 self->extra->length--;
1746 for (i = index; i < self->extra->length; i++)
1747 self->extra->children[i] = self->extra->children[i+1];
1748 }
1749
1750 Py_DECREF(old);
1751
1752 return 0;
1753}
1754
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001755static PyObject*
1756element_subscr(PyObject* self_, PyObject* item)
1757{
1758 ElementObject* self = (ElementObject*) self_;
1759
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001760 if (PyIndex_Check(item)) {
1761 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762
1763 if (i == -1 && PyErr_Occurred()) {
1764 return NULL;
1765 }
1766 if (i < 0 && self->extra)
1767 i += self->extra->length;
1768 return element_getitem(self_, i);
1769 }
1770 else if (PySlice_Check(item)) {
1771 Py_ssize_t start, stop, step, slicelen, cur, i;
1772 PyObject* list;
1773
1774 if (!self->extra)
1775 return PyList_New(0);
1776
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001777 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778 return NULL;
1779 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001780 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1781 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782
1783 if (slicelen <= 0)
1784 return PyList_New(0);
1785 else {
1786 list = PyList_New(slicelen);
1787 if (!list)
1788 return NULL;
1789
1790 for (cur = start, i = 0; i < slicelen;
1791 cur += step, i++) {
1792 PyObject* item = self->extra->children[cur];
1793 Py_INCREF(item);
1794 PyList_SET_ITEM(list, i, item);
1795 }
1796
1797 return list;
1798 }
1799 }
1800 else {
1801 PyErr_SetString(PyExc_TypeError,
1802 "element indices must be integers");
1803 return NULL;
1804 }
1805}
1806
1807static int
1808element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1809{
1810 ElementObject* self = (ElementObject*) self_;
1811
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001812 if (PyIndex_Check(item)) {
1813 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001814
1815 if (i == -1 && PyErr_Occurred()) {
1816 return -1;
1817 }
1818 if (i < 0 && self->extra)
1819 i += self->extra->length;
1820 return element_setitem(self_, i, value);
1821 }
1822 else if (PySlice_Check(item)) {
1823 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1824
1825 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001826 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001827
Victor Stinner5f0af232013-07-11 23:01:36 +02001828 if (!self->extra) {
1829 if (create_extra(self, NULL) < 0)
1830 return -1;
1831 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001832
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001833 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001834 return -1;
1835 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001836 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1837 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001838
Eli Bendersky865756a2012-03-09 13:38:15 +02001839 if (value == NULL) {
1840 /* Delete slice */
1841 size_t cur;
1842 Py_ssize_t i;
1843
1844 if (slicelen <= 0)
1845 return 0;
1846
1847 /* Since we're deleting, the direction of the range doesn't matter,
1848 * so for simplicity make it always ascending.
1849 */
1850 if (step < 0) {
1851 stop = start + 1;
1852 start = stop + step * (slicelen - 1) - 1;
1853 step = -step;
1854 }
1855
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001856 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001857
1858 /* recycle is a list that will contain all the children
1859 * scheduled for removal.
1860 */
1861 if (!(recycle = PyList_New(slicelen))) {
Eli Bendersky865756a2012-03-09 13:38:15 +02001862 return -1;
1863 }
1864
1865 /* This loop walks over all the children that have to be deleted,
1866 * with cur pointing at them. num_moved is the amount of children
1867 * until the next deleted child that have to be "shifted down" to
1868 * occupy the deleted's places.
1869 * Note that in the ith iteration, shifting is done i+i places down
1870 * because i children were already removed.
1871 */
1872 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1873 /* Compute how many children have to be moved, clipping at the
1874 * list end.
1875 */
1876 Py_ssize_t num_moved = step - 1;
1877 if (cur + step >= (size_t)self->extra->length) {
1878 num_moved = self->extra->length - cur - 1;
1879 }
1880
1881 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1882
1883 memmove(
1884 self->extra->children + cur - i,
1885 self->extra->children + cur + 1,
1886 num_moved * sizeof(PyObject *));
1887 }
1888
1889 /* Leftover "tail" after the last removed child */
1890 cur = start + (size_t)slicelen * step;
1891 if (cur < (size_t)self->extra->length) {
1892 memmove(
1893 self->extra->children + cur - slicelen,
1894 self->extra->children + cur,
1895 (self->extra->length - cur) * sizeof(PyObject *));
1896 }
1897
1898 self->extra->length -= slicelen;
1899
1900 /* Discard the recycle list with all the deleted sub-elements */
Miss Islington (bot)c46f0422018-10-23 12:45:44 -07001901 Py_DECREF(recycle);
Eli Bendersky865756a2012-03-09 13:38:15 +02001902 return 0;
1903 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001904
1905 /* A new slice is actually being assigned */
1906 seq = PySequence_Fast(value, "");
1907 if (!seq) {
1908 PyErr_Format(
1909 PyExc_TypeError,
1910 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1911 );
1912 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001913 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001914 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001915
1916 if (step != 1 && newlen != slicelen)
1917 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001918 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001920 "attempt to assign sequence of size %zd "
1921 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001922 newlen, slicelen
1923 );
1924 return -1;
1925 }
1926
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001927 /* Resize before creating the recycle bin, to prevent refleaks. */
1928 if (newlen > slicelen) {
1929 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001930 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001931 return -1;
1932 }
1933 }
1934
1935 if (slicelen > 0) {
1936 /* to avoid recursive calls to this method (via decref), move
1937 old items to the recycle bin here, and get rid of them when
1938 we're done modifying the element */
1939 recycle = PyList_New(slicelen);
1940 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001941 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001942 return -1;
1943 }
1944 for (cur = start, i = 0; i < slicelen;
1945 cur += step, i++)
1946 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1947 }
1948
1949 if (newlen < slicelen) {
1950 /* delete slice */
1951 for (i = stop; i < self->extra->length; i++)
1952 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1953 } else if (newlen > slicelen) {
1954 /* insert slice */
1955 for (i = self->extra->length-1; i >= stop; i--)
1956 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1957 }
1958
1959 /* replace the slice */
1960 for (cur = start, i = 0; i < newlen;
1961 cur += step, i++) {
1962 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1963 Py_INCREF(element);
1964 self->extra->children[cur] = element;
1965 }
1966
1967 self->extra->length += newlen - slicelen;
1968
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001969 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001970
1971 /* discard the recycle bin, and everything in it */
1972 Py_XDECREF(recycle);
1973
1974 return 0;
1975 }
1976 else {
1977 PyErr_SetString(PyExc_TypeError,
1978 "element indices must be integers");
1979 return -1;
1980 }
1981}
1982
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001983static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001984element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001985{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001986 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001987 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001988 return res;
1989}
1990
Serhiy Storchakadde08152015-11-25 15:28:13 +02001991static PyObject*
1992element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001993{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001994 PyObject *res = element_get_text(self);
1995 Py_XINCREF(res);
1996 return res;
1997}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001998
Serhiy Storchakadde08152015-11-25 15:28:13 +02001999static PyObject*
2000element_tail_getter(ElementObject *self, void *closure)
2001{
2002 PyObject *res = element_get_tail(self);
2003 Py_XINCREF(res);
2004 return res;
2005}
2006
2007static PyObject*
2008element_attrib_getter(ElementObject *self, void *closure)
2009{
2010 PyObject *res;
2011 if (!self->extra) {
2012 if (create_extra(self, NULL) < 0)
2013 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02002014 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02002015 res = element_get_attrib(self);
2016 Py_XINCREF(res);
2017 return res;
2018}
Victor Stinner4d463432013-07-11 23:05:03 +02002019
Serhiy Storchakadde08152015-11-25 15:28:13 +02002020/* macro for setter validation */
2021#define _VALIDATE_ATTR_VALUE(V) \
2022 if ((V) == NULL) { \
2023 PyErr_SetString( \
2024 PyExc_AttributeError, \
2025 "can't delete element attribute"); \
2026 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002027 }
2028
Serhiy Storchakadde08152015-11-25 15:28:13 +02002029static int
2030element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2031{
2032 _VALIDATE_ATTR_VALUE(value);
2033 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002034 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002035 return 0;
2036}
2037
2038static int
2039element_text_setter(ElementObject *self, PyObject *value, void *closure)
2040{
2041 _VALIDATE_ATTR_VALUE(value);
2042 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002043 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002044 return 0;
2045}
2046
2047static int
2048element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2049{
2050 _VALIDATE_ATTR_VALUE(value);
2051 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002052 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002053 return 0;
2054}
2055
2056static int
2057element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2058{
2059 _VALIDATE_ATTR_VALUE(value);
2060 if (!self->extra) {
2061 if (create_extra(self, NULL) < 0)
2062 return -1;
2063 }
2064 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002065 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002066 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002067}
2068
2069static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002070 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002071 0, /* sq_concat */
2072 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002073 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002074 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002075 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002076 0,
2077};
2078
Eli Bendersky64d11e62012-06-15 07:42:50 +03002079/******************************* Element iterator ****************************/
2080
2081/* ElementIterObject represents the iteration state over an XML element in
2082 * pre-order traversal. To keep track of which sub-element should be returned
2083 * next, a stack of parents is maintained. This is a standard stack-based
2084 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002085 * The stack is managed using a continuous array.
2086 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002087 * the current one is exhausted, and the next child to examine in that parent.
2088 */
2089typedef struct ParentLocator_t {
2090 ElementObject *parent;
2091 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092} ParentLocator;
2093
2094typedef struct {
2095 PyObject_HEAD
2096 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002097 Py_ssize_t parent_stack_used;
2098 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099 ElementObject *root_element;
2100 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101 int gettext;
2102} ElementIterObject;
2103
2104
2105static void
2106elementiter_dealloc(ElementIterObject *it)
2107{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002108 Py_ssize_t i = it->parent_stack_used;
2109 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002110 /* bpo-31095: UnTrack is needed before calling any callbacks */
2111 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002112 while (i--)
2113 Py_XDECREF(it->parent_stack[i].parent);
2114 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115
2116 Py_XDECREF(it->sought_tag);
2117 Py_XDECREF(it->root_element);
2118
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 PyObject_GC_Del(it);
2120}
2121
2122static int
2123elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2124{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002125 Py_ssize_t i = it->parent_stack_used;
2126 while (i--)
2127 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002128
2129 Py_VISIT(it->root_element);
2130 Py_VISIT(it->sought_tag);
2131 return 0;
2132}
2133
2134/* Helper function for elementiter_next. Add a new parent to the parent stack.
2135 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002136static int
2137parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002139 ParentLocator *item;
2140
2141 if (it->parent_stack_used >= it->parent_stack_size) {
2142 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2143 ParentLocator *parent_stack = it->parent_stack;
2144 PyMem_Resize(parent_stack, ParentLocator, new_size);
2145 if (parent_stack == NULL)
2146 return -1;
2147 it->parent_stack = parent_stack;
2148 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002150 item = it->parent_stack + it->parent_stack_used++;
2151 Py_INCREF(parent);
2152 item->parent = parent;
2153 item->child_index = 0;
2154 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002155}
2156
2157static PyObject *
2158elementiter_next(ElementIterObject *it)
2159{
2160 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002161 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002162 * A short note on gettext: this function serves both the iter() and
2163 * itertext() methods to avoid code duplication. However, there are a few
2164 * small differences in the way these iterations work. Namely:
2165 * - itertext() only yields text from nodes that have it, and continues
2166 * iterating when a node doesn't have text (so it doesn't return any
2167 * node like iter())
2168 * - itertext() also has to handle tail, after finishing with all the
2169 * children of a node.
2170 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002171 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002172 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002173 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002174
2175 while (1) {
2176 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002177 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002178 * iterator is exhausted.
2179 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002180 if (!it->parent_stack_used) {
2181 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002182 PyErr_SetNone(PyExc_StopIteration);
2183 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002184 }
2185
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002186 elem = it->root_element; /* steals a reference */
2187 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 }
2189 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002190 /* See if there are children left to traverse in the current parent. If
2191 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002192 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002193 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2194 Py_ssize_t child_index = item->child_index;
2195 ElementObjectExtra *extra;
2196 elem = item->parent;
2197 extra = elem->extra;
2198 if (!extra || child_index >= extra->length) {
2199 it->parent_stack_used--;
2200 /* Note that extra condition on it->parent_stack_used here;
2201 * this is because itertext() is supposed to only return *inner*
2202 * text, not text following the element it began iteration with.
2203 */
2204 if (it->gettext && it->parent_stack_used) {
2205 text = element_get_tail(elem);
2206 goto gettext;
2207 }
2208 Py_DECREF(elem);
2209 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002210 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002211
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07002212 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002213 PyErr_Format(PyExc_AttributeError,
2214 "'%.100s' object has no attribute 'iter'",
2215 Py_TYPE(extra->children[child_index])->tp_name);
2216 return NULL;
2217 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002218 elem = (ElementObject *)extra->children[child_index];
2219 item->child_index++;
2220 Py_INCREF(elem);
2221 }
2222
2223 if (parent_stack_push_new(it, elem) < 0) {
2224 Py_DECREF(elem);
2225 PyErr_NoMemory();
2226 return NULL;
2227 }
2228 if (it->gettext) {
2229 text = element_get_text(elem);
2230 goto gettext;
2231 }
2232
2233 if (it->sought_tag == Py_None)
2234 return (PyObject *)elem;
2235
2236 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2237 if (rc > 0)
2238 return (PyObject *)elem;
2239
2240 Py_DECREF(elem);
2241 if (rc < 0)
2242 return NULL;
2243 continue;
2244
2245gettext:
2246 if (!text) {
2247 Py_DECREF(elem);
2248 return NULL;
2249 }
2250 if (text == Py_None) {
2251 Py_DECREF(elem);
2252 }
2253 else {
2254 Py_INCREF(text);
2255 Py_DECREF(elem);
2256 rc = PyObject_IsTrue(text);
2257 if (rc > 0)
2258 return text;
2259 Py_DECREF(text);
2260 if (rc < 0)
2261 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002262 }
2263 }
2264
2265 return NULL;
2266}
2267
2268
2269static PyTypeObject ElementIter_Type = {
2270 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002271 /* Using the module's name since the pure-Python implementation does not
2272 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002273 "_elementtree._element_iterator", /* tp_name */
2274 sizeof(ElementIterObject), /* tp_basicsize */
2275 0, /* tp_itemsize */
2276 /* methods */
2277 (destructor)elementiter_dealloc, /* tp_dealloc */
2278 0, /* tp_print */
2279 0, /* tp_getattr */
2280 0, /* tp_setattr */
2281 0, /* tp_reserved */
2282 0, /* tp_repr */
2283 0, /* tp_as_number */
2284 0, /* tp_as_sequence */
2285 0, /* tp_as_mapping */
2286 0, /* tp_hash */
2287 0, /* tp_call */
2288 0, /* tp_str */
2289 0, /* tp_getattro */
2290 0, /* tp_setattro */
2291 0, /* tp_as_buffer */
2292 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2293 0, /* tp_doc */
2294 (traverseproc)elementiter_traverse, /* tp_traverse */
2295 0, /* tp_clear */
2296 0, /* tp_richcompare */
2297 0, /* tp_weaklistoffset */
2298 PyObject_SelfIter, /* tp_iter */
2299 (iternextfunc)elementiter_next, /* tp_iternext */
2300 0, /* tp_methods */
2301 0, /* tp_members */
2302 0, /* tp_getset */
2303 0, /* tp_base */
2304 0, /* tp_dict */
2305 0, /* tp_descr_get */
2306 0, /* tp_descr_set */
2307 0, /* tp_dictoffset */
2308 0, /* tp_init */
2309 0, /* tp_alloc */
2310 0, /* tp_new */
2311};
2312
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002313#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002314
2315static PyObject *
2316create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2317{
2318 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002319
2320 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2321 if (!it)
2322 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002323
Victor Stinner4d463432013-07-11 23:05:03 +02002324 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002325 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002326 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002327 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002328 it->root_element = self;
2329
Eli Bendersky64d11e62012-06-15 07:42:50 +03002330 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002331
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002332 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002333 if (it->parent_stack == NULL) {
2334 Py_DECREF(it);
2335 PyErr_NoMemory();
2336 return NULL;
2337 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002338 it->parent_stack_used = 0;
2339 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002340
Eli Bendersky64d11e62012-06-15 07:42:50 +03002341 return (PyObject *)it;
2342}
2343
2344
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345/* ==================================================================== */
2346/* the tree builder type */
2347
2348typedef struct {
2349 PyObject_HEAD
2350
Eli Bendersky58d548d2012-05-29 15:45:16 +03002351 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Antoine Pitrouee329312012-10-04 19:53:29 +02002353 PyObject *this; /* current node */
2354 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002357
Eli Bendersky58d548d2012-05-29 15:45:16 +03002358 PyObject *stack; /* element stack */
2359 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360
Eli Bendersky48d358b2012-05-30 17:57:50 +03002361 PyObject *element_factory;
2362
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002364 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002365 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2366 PyObject *end_event_obj;
2367 PyObject *start_ns_event_obj;
2368 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369} TreeBuilderObject;
2370
Christian Heimes90aa7642007-12-19 02:45:37 +00002371#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372
2373/* -------------------------------------------------------------------- */
2374/* constructor and destructor */
2375
Eli Bendersky58d548d2012-05-29 15:45:16 +03002376static PyObject *
2377treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002378{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002379 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2380 if (t != NULL) {
2381 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002384 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002385 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002386 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
Eli Bendersky58d548d2012-05-29 15:45:16 +03002388 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002389 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 t->stack = PyList_New(20);
2391 if (!t->stack) {
2392 Py_DECREF(t->this);
2393 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002394 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002395 return NULL;
2396 }
2397 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002398
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002399 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 t->start_event_obj = t->end_event_obj = NULL;
2401 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2402 }
2403 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002404}
2405
Serhiy Storchakacb985562015-05-04 15:32:48 +03002406/*[clinic input]
2407_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002408
Serhiy Storchakacb985562015-05-04 15:32:48 +03002409 element_factory: object = NULL
2410
2411[clinic start generated code]*/
2412
2413static int
2414_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2415 PyObject *element_factory)
2416/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2417{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002418 if (element_factory) {
2419 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002420 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002421 }
2422
Eli Bendersky58d548d2012-05-29 15:45:16 +03002423 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424}
2425
Eli Bendersky48d358b2012-05-30 17:57:50 +03002426static int
2427treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2428{
2429 Py_VISIT(self->root);
2430 Py_VISIT(self->this);
2431 Py_VISIT(self->last);
2432 Py_VISIT(self->data);
2433 Py_VISIT(self->stack);
2434 Py_VISIT(self->element_factory);
2435 return 0;
2436}
2437
2438static int
2439treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002440{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002441 Py_CLEAR(self->end_ns_event_obj);
2442 Py_CLEAR(self->start_ns_event_obj);
2443 Py_CLEAR(self->end_event_obj);
2444 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002445 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002446 Py_CLEAR(self->stack);
2447 Py_CLEAR(self->data);
2448 Py_CLEAR(self->last);
2449 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002450 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002451 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002452 return 0;
2453}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002454
Eli Bendersky48d358b2012-05-30 17:57:50 +03002455static void
2456treebuilder_dealloc(TreeBuilderObject *self)
2457{
2458 PyObject_GC_UnTrack(self);
2459 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002460 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461}
2462
2463/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002464/* helpers for handling of arbitrary element-like objects */
2465
2466static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002467treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002468 PyObject **dest, _Py_Identifier *name)
2469{
2470 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002471 PyObject *tmp = JOIN_OBJ(*dest);
2472 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2473 *data = NULL;
2474 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002475 return 0;
2476 }
2477 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002478 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002479 int r;
2480 if (joined == NULL)
2481 return -1;
2482 r = _PyObject_SetAttrId(element, name, joined);
2483 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002484 if (r < 0)
2485 return -1;
2486 Py_CLEAR(*data);
2487 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002488 }
2489}
2490
Serhiy Storchaka576def02017-03-30 09:47:31 +03002491LOCAL(int)
2492treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002493{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002494 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002495
Serhiy Storchaka576def02017-03-30 09:47:31 +03002496 if (!self->data) {
2497 return 0;
2498 }
2499
2500 if (self->this == element) {
2501 _Py_IDENTIFIER(text);
2502 return treebuilder_set_element_text_or_tail(
2503 element, &self->data,
2504 &((ElementObject *) element)->text, &PyId_text);
2505 }
2506 else {
2507 _Py_IDENTIFIER(tail);
2508 return treebuilder_set_element_text_or_tail(
2509 element, &self->data,
2510 &((ElementObject *) element)->tail, &PyId_tail);
2511 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002512}
2513
2514static int
2515treebuilder_add_subelement(PyObject *element, PyObject *child)
2516{
2517 _Py_IDENTIFIER(append);
2518 if (Element_CheckExact(element)) {
2519 ElementObject *elem = (ElementObject *) element;
2520 return element_add_subelement(elem, child);
2521 }
2522 else {
2523 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002524 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002525 if (res == NULL)
2526 return -1;
2527 Py_DECREF(res);
2528 return 0;
2529 }
2530}
2531
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002532LOCAL(int)
2533treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2534 PyObject *node)
2535{
2536 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002537 PyObject *res;
2538 PyObject *event = PyTuple_Pack(2, action, node);
2539 if (event == NULL)
2540 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002541 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002542 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002543 if (res == NULL)
2544 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002545 Py_DECREF(res);
2546 }
2547 return 0;
2548}
2549
Antoine Pitrouee329312012-10-04 19:53:29 +02002550/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551/* handlers */
2552
2553LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2555 PyObject* attrib)
2556{
2557 PyObject* node;
2558 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002559 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560
Serhiy Storchaka576def02017-03-30 09:47:31 +03002561 if (treebuilder_flush_data(self) < 0) {
2562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 }
2564
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002565 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002566 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002567 } else if (attrib == Py_None) {
2568 attrib = PyDict_New();
2569 if (!attrib)
2570 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002571 node = PyObject_CallFunctionObjArgs(self->element_factory,
2572 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002573 Py_DECREF(attrib);
2574 }
2575 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002576 node = PyObject_CallFunctionObjArgs(self->element_factory,
2577 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002578 }
2579 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002581 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582
Antoine Pitrouee329312012-10-04 19:53:29 +02002583 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584
2585 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002586 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002587 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588 } else {
2589 if (self->root) {
2590 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002591 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002592 "multiple elements on top level"
2593 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002594 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002595 }
2596 Py_INCREF(node);
2597 self->root = node;
2598 }
2599
2600 if (self->index < PyList_GET_SIZE(self->stack)) {
2601 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002602 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 Py_INCREF(this);
2604 } else {
2605 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002606 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607 }
2608 self->index++;
2609
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002611 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002613 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002615 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2616 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617
2618 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002619
2620 error:
2621 Py_DECREF(node);
2622 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623}
2624
2625LOCAL(PyObject*)
2626treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2627{
2628 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002629 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002630 /* ignore calls to data before the first call to start */
2631 Py_RETURN_NONE;
2632 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633 /* store the first item as is */
2634 Py_INCREF(data); self->data = data;
2635 } else {
2636 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002637 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2638 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002639 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640 /* expat often generates single character data sections; handle
2641 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002642 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2643 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002645 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 } else if (PyList_CheckExact(self->data)) {
2647 if (PyList_Append(self->data, data) < 0)
2648 return NULL;
2649 } else {
2650 PyObject* list = PyList_New(2);
2651 if (!list)
2652 return NULL;
2653 PyList_SET_ITEM(list, 0, self->data);
2654 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2655 self->data = list;
2656 }
2657 }
2658
2659 Py_RETURN_NONE;
2660}
2661
2662LOCAL(PyObject*)
2663treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2664{
2665 PyObject* item;
2666
Serhiy Storchaka576def02017-03-30 09:47:31 +03002667 if (treebuilder_flush_data(self) < 0) {
2668 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669 }
2670
2671 if (self->index == 0) {
2672 PyErr_SetString(
2673 PyExc_IndexError,
2674 "pop from empty stack"
2675 );
2676 return NULL;
2677 }
2678
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002679 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002680 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002681 self->index--;
2682 self->this = PyList_GET_ITEM(self->stack, self->index);
2683 Py_INCREF(self->this);
2684 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002686 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2687 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688
2689 Py_INCREF(self->last);
2690 return (PyObject*) self->last;
2691}
2692
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693/* -------------------------------------------------------------------- */
2694/* methods (in alphabetical order) */
2695
Serhiy Storchakacb985562015-05-04 15:32:48 +03002696/*[clinic input]
2697_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698
Serhiy Storchakacb985562015-05-04 15:32:48 +03002699 data: object
2700 /
2701
2702[clinic start generated code]*/
2703
2704static PyObject *
2705_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2706/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2707{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708 return treebuilder_handle_data(self, data);
2709}
2710
Serhiy Storchakacb985562015-05-04 15:32:48 +03002711/*[clinic input]
2712_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713
Serhiy Storchakacb985562015-05-04 15:32:48 +03002714 tag: object
2715 /
2716
2717[clinic start generated code]*/
2718
2719static PyObject *
2720_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2721/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2722{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723 return treebuilder_handle_end(self, tag);
2724}
2725
2726LOCAL(PyObject*)
2727treebuilder_done(TreeBuilderObject* self)
2728{
2729 PyObject* res;
2730
2731 /* FIXME: check stack size? */
2732
2733 if (self->root)
2734 res = self->root;
2735 else
2736 res = Py_None;
2737
2738 Py_INCREF(res);
2739 return res;
2740}
2741
Serhiy Storchakacb985562015-05-04 15:32:48 +03002742/*[clinic input]
2743_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744
Serhiy Storchakacb985562015-05-04 15:32:48 +03002745[clinic start generated code]*/
2746
2747static PyObject *
2748_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2749/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2750{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 return treebuilder_done(self);
2752}
2753
Serhiy Storchakacb985562015-05-04 15:32:48 +03002754/*[clinic input]
2755_elementtree.TreeBuilder.start
2756
2757 tag: object
2758 attrs: object = None
2759 /
2760
2761[clinic start generated code]*/
2762
2763static PyObject *
2764_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2765 PyObject *attrs)
2766/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002768 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769}
2770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771/* ==================================================================== */
2772/* the expat interface */
2773
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002776
2777/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2778 * cached globally without being in per-module state.
2779 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002780static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782
Eli Bendersky52467b12012-06-01 07:13:08 +03002783static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2784 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2785
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786typedef struct {
2787 PyObject_HEAD
2788
2789 XML_Parser parser;
2790
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002791 PyObject *target;
2792 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002794 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002796 PyObject *handle_start;
2797 PyObject *handle_data;
2798 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002800 PyObject *handle_comment;
2801 PyObject *handle_pi;
2802 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002804 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002805
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806} XMLParserObject;
2807
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002808static PyObject*
Serhiy Storchakaa5552f02017-12-15 13:11:11 +02002809_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002810static PyObject *
2811_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2812 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814/* helpers */
2815
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002816LOCAL(PyObject*)
2817makeuniversal(XMLParserObject* self, const char* string)
2818{
2819 /* convert a UTF-8 tag/attribute name from the expat parser
2820 to a universal name string */
2821
Antoine Pitrouc1948842012-10-01 23:40:37 +02002822 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 PyObject* key;
2824 PyObject* value;
2825
2826 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002827 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002828 if (!key)
2829 return NULL;
2830
2831 value = PyDict_GetItem(self->names, key);
2832
2833 if (value) {
2834 Py_INCREF(value);
2835 } else {
2836 /* new name. convert to universal name, and decode as
2837 necessary */
2838
2839 PyObject* tag;
2840 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002841 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842
2843 /* look for namespace separator */
2844 for (i = 0; i < size; i++)
2845 if (string[i] == '}')
2846 break;
2847 if (i != size) {
2848 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002849 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002850 if (tag == NULL) {
2851 Py_DECREF(key);
2852 return NULL;
2853 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002854 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002855 p[0] = '{';
2856 memcpy(p+1, string, size);
2857 size++;
2858 } else {
2859 /* plain name; use key as tag */
2860 Py_INCREF(key);
2861 tag = key;
2862 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002863
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002864 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002865 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002866 value = PyUnicode_DecodeUTF8(p, size, "strict");
2867 Py_DECREF(tag);
2868 if (!value) {
2869 Py_DECREF(key);
2870 return NULL;
2871 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872
2873 /* add to names dictionary */
2874 if (PyDict_SetItem(self->names, key, value) < 0) {
2875 Py_DECREF(key);
2876 Py_DECREF(value);
2877 return NULL;
2878 }
2879 }
2880
2881 Py_DECREF(key);
2882 return value;
2883}
2884
Eli Bendersky5b77d812012-03-16 08:20:05 +02002885/* Set the ParseError exception with the given parameters.
2886 * If message is not NULL, it's used as the error string. Otherwise, the
2887 * message string is the default for the given error_code.
2888*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002889static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002890expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2891 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002892{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002893 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002894 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002895
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002896 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002897 message ? message : EXPAT(ErrorString)(error_code),
2898 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002899 if (errmsg == NULL)
2900 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002901
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002902 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002903 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904 if (!error)
2905 return;
2906
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 /* Add code and position attributes */
2908 code = PyLong_FromLong((long)error_code);
2909 if (!code) {
2910 Py_DECREF(error);
2911 return;
2912 }
2913 if (PyObject_SetAttrString(error, "code", code) == -1) {
2914 Py_DECREF(error);
2915 Py_DECREF(code);
2916 return;
2917 }
2918 Py_DECREF(code);
2919
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002920 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002921 if (!position) {
2922 Py_DECREF(error);
2923 return;
2924 }
2925 if (PyObject_SetAttrString(error, "position", position) == -1) {
2926 Py_DECREF(error);
2927 Py_DECREF(position);
2928 return;
2929 }
2930 Py_DECREF(position);
2931
Eli Bendersky532d03e2013-08-10 08:00:39 -07002932 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002933 Py_DECREF(error);
2934}
2935
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936/* -------------------------------------------------------------------- */
2937/* handlers */
2938
2939static void
2940expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2941 int data_len)
2942{
2943 PyObject* key;
2944 PyObject* value;
2945 PyObject* res;
2946
2947 if (data_len < 2 || data_in[0] != '&')
2948 return;
2949
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002950 if (PyErr_Occurred())
2951 return;
2952
Neal Norwitz0269b912007-08-08 06:56:02 +00002953 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954 if (!key)
2955 return;
2956
2957 value = PyDict_GetItem(self->entity, key);
2958
2959 if (value) {
2960 if (TreeBuilder_CheckExact(self->target))
2961 res = treebuilder_handle_data(
2962 (TreeBuilderObject*) self->target, value
2963 );
2964 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002965 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 else
2967 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002969 } else if (!PyErr_Occurred()) {
2970 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002971 char message[128] = "undefined entity ";
2972 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002973 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002974 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002976 EXPAT(GetErrorColumnNumber)(self->parser),
2977 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 );
2979 }
2980
2981 Py_DECREF(key);
2982}
2983
2984static void
2985expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2986 const XML_Char **attrib_in)
2987{
2988 PyObject* res;
2989 PyObject* tag;
2990 PyObject* attrib;
2991 int ok;
2992
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002993 if (PyErr_Occurred())
2994 return;
2995
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002996 /* tag name */
2997 tag = makeuniversal(self, tag_in);
2998 if (!tag)
2999 return; /* parser will look for errors */
3000
3001 /* attributes */
3002 if (attrib_in[0]) {
3003 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003004 if (!attrib) {
3005 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003007 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 while (attrib_in[0] && attrib_in[1]) {
3009 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00003010 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003011 if (!key || !value) {
3012 Py_XDECREF(value);
3013 Py_XDECREF(key);
3014 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003015 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 return;
3017 }
3018 ok = PyDict_SetItem(attrib, key, value);
3019 Py_DECREF(value);
3020 Py_DECREF(key);
3021 if (ok < 0) {
3022 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02003023 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003024 return;
3025 }
3026 attrib_in += 2;
3027 }
3028 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003029 Py_INCREF(Py_None);
3030 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003031 }
3032
3033 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003034 /* shortcut */
3035 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3036 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003037 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003038 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003039 if (attrib == Py_None) {
3040 Py_DECREF(attrib);
3041 attrib = PyDict_New();
3042 if (!attrib) {
3043 Py_DECREF(tag);
3044 return;
3045 }
3046 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003047 res = PyObject_CallFunctionObjArgs(self->handle_start,
3048 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003049 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003050 res = NULL;
3051
3052 Py_DECREF(tag);
3053 Py_DECREF(attrib);
3054
3055 Py_XDECREF(res);
3056}
3057
3058static void
3059expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3060 int data_len)
3061{
3062 PyObject* data;
3063 PyObject* res;
3064
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003065 if (PyErr_Occurred())
3066 return;
3067
Neal Norwitz0269b912007-08-08 06:56:02 +00003068 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003069 if (!data)
3070 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071
3072 if (TreeBuilder_CheckExact(self->target))
3073 /* shortcut */
3074 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3075 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003076 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003077 else
3078 res = NULL;
3079
3080 Py_DECREF(data);
3081
3082 Py_XDECREF(res);
3083}
3084
3085static void
3086expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3087{
3088 PyObject* tag;
3089 PyObject* res = NULL;
3090
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003091 if (PyErr_Occurred())
3092 return;
3093
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 if (TreeBuilder_CheckExact(self->target))
3095 /* shortcut */
3096 /* the standard tree builder doesn't look at the end tag */
3097 res = treebuilder_handle_end(
3098 (TreeBuilderObject*) self->target, Py_None
3099 );
3100 else if (self->handle_end) {
3101 tag = makeuniversal(self, tag_in);
3102 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003103 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 Py_DECREF(tag);
3105 }
3106 }
3107
3108 Py_XDECREF(res);
3109}
3110
3111static void
3112expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3113 const XML_Char *uri)
3114{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003115 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3116 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003117
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003118 if (PyErr_Occurred())
3119 return;
3120
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003121 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003122 return;
3123
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003124 if (!uri)
3125 uri = "";
3126 if (!prefix)
3127 prefix = "";
3128
3129 parcel = Py_BuildValue("ss", prefix, uri);
3130 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003131 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003132 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3133 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134}
3135
3136static void
3137expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3138{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003139 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3140
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003141 if (PyErr_Occurred())
3142 return;
3143
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003144 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003145 return;
3146
3147 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003148}
3149
3150static void
3151expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3152{
3153 PyObject* comment;
3154 PyObject* res;
3155
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003156 if (PyErr_Occurred())
3157 return;
3158
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003159 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003160 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003161 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003162 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3163 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003164 Py_XDECREF(res);
3165 Py_DECREF(comment);
3166 }
3167 }
3168}
3169
Eli Bendersky45839902013-01-13 05:14:47 -08003170static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003171expat_start_doctype_handler(XMLParserObject *self,
3172 const XML_Char *doctype_name,
3173 const XML_Char *sysid,
3174 const XML_Char *pubid,
3175 int has_internal_subset)
3176{
3177 PyObject *self_pyobj = (PyObject *)self;
3178 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3179 PyObject *parser_doctype = NULL;
3180 PyObject *res = NULL;
3181
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003182 if (PyErr_Occurred())
3183 return;
3184
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003185 doctype_name_obj = makeuniversal(self, doctype_name);
3186 if (!doctype_name_obj)
3187 return;
3188
3189 if (sysid) {
3190 sysid_obj = makeuniversal(self, sysid);
3191 if (!sysid_obj) {
3192 Py_DECREF(doctype_name_obj);
3193 return;
3194 }
3195 } else {
3196 Py_INCREF(Py_None);
3197 sysid_obj = Py_None;
3198 }
3199
3200 if (pubid) {
3201 pubid_obj = makeuniversal(self, pubid);
3202 if (!pubid_obj) {
3203 Py_DECREF(doctype_name_obj);
3204 Py_DECREF(sysid_obj);
3205 return;
3206 }
3207 } else {
3208 Py_INCREF(Py_None);
3209 pubid_obj = Py_None;
3210 }
3211
3212 /* If the target has a handler for doctype, call it. */
3213 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003214 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3215 doctype_name_obj, pubid_obj,
3216 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003217 Py_CLEAR(res);
3218 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003219 else {
3220 /* Now see if the parser itself has a doctype method. If yes and it's
3221 * a custom method, call it but warn about deprecation. If it's only
3222 * the vanilla XMLParser method, do nothing.
3223 */
3224 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3225 if (parser_doctype &&
3226 !(PyCFunction_Check(parser_doctype) &&
3227 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3228 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003229 (PyCFunction) _elementtree_XMLParser_doctype)) {
3230 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3231 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003232 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003233 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003234 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003235 res = PyObject_CallFunctionObjArgs(parser_doctype,
3236 doctype_name_obj, pubid_obj,
3237 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003238 Py_CLEAR(res);
3239 }
3240 }
3241
3242clear:
3243 Py_XDECREF(parser_doctype);
3244 Py_DECREF(doctype_name_obj);
3245 Py_DECREF(pubid_obj);
3246 Py_DECREF(sysid_obj);
3247}
3248
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249static void
3250expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3251 const XML_Char* data_in)
3252{
3253 PyObject* target;
3254 PyObject* data;
3255 PyObject* res;
3256
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003257 if (PyErr_Occurred())
3258 return;
3259
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003261 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3262 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003264 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3265 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 Py_XDECREF(res);
3267 Py_DECREF(data);
3268 Py_DECREF(target);
3269 } else {
3270 Py_XDECREF(data);
3271 Py_XDECREF(target);
3272 }
3273 }
3274}
3275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277
Eli Bendersky52467b12012-06-01 07:13:08 +03003278static PyObject *
3279xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280{
Eli Bendersky52467b12012-06-01 07:13:08 +03003281 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3282 if (self) {
3283 self->parser = NULL;
3284 self->target = self->entity = self->names = NULL;
3285 self->handle_start = self->handle_data = self->handle_end = NULL;
3286 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003287 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 return (PyObject *)self;
3290}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291
scoderc8d8e152017-09-14 22:00:03 +02003292static int
3293ignore_attribute_error(PyObject *value)
3294{
3295 if (value == NULL) {
3296 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3297 return -1;
3298 }
3299 PyErr_Clear();
3300 }
3301 return 0;
3302}
3303
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304/*[clinic input]
3305_elementtree.XMLParser.__init__
3306
3307 html: object = NULL
3308 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003309 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310
3311[clinic start generated code]*/
3312
Eli Bendersky52467b12012-06-01 07:13:08 +03003313static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003314_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3315 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003316/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003317{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003318 if (html != NULL) {
3319 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3320 "The html argument of XMLParser() is deprecated",
3321 1) < 0) {
3322 return -1;
3323 }
3324 }
3325
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->entity = PyDict_New();
3327 if (!self->entity)
3328 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329
Serhiy Storchakacb985562015-05-04 15:32:48 +03003330 self->names = PyDict_New();
3331 if (!self->names) {
3332 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003333 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003335
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3337 if (!self->parser) {
3338 Py_CLEAR(self->entity);
3339 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003341 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342 }
Miss Islington (bot)470a4352018-09-18 06:11:09 -07003343 /* expat < 2.1.0 has no XML_SetHashSalt() */
3344 if (EXPAT(SetHashSalt) != NULL) {
3345 EXPAT(SetHashSalt)(self->parser,
3346 (unsigned long)_Py_HashSecret.expat.hashsalt);
3347 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
Eli Bendersky52467b12012-06-01 07:13:08 +03003349 if (target) {
3350 Py_INCREF(target);
3351 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003352 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003354 Py_CLEAR(self->entity);
3355 Py_CLEAR(self->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003356 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003358 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003359 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360
Serhiy Storchakacb985562015-05-04 15:32:48 +03003361 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003362 if (ignore_attribute_error(self->handle_start)) {
3363 return -1;
3364 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003366 if (ignore_attribute_error(self->handle_data)) {
3367 return -1;
3368 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003370 if (ignore_attribute_error(self->handle_end)) {
3371 return -1;
3372 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003373 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003374 if (ignore_attribute_error(self->handle_comment)) {
3375 return -1;
3376 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003378 if (ignore_attribute_error(self->handle_pi)) {
3379 return -1;
3380 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003381 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003382 if (ignore_attribute_error(self->handle_close)) {
3383 return -1;
3384 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003385 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003386 if (ignore_attribute_error(self->handle_doctype)) {
3387 return -1;
3388 }
Eli Bendersky45839902013-01-13 05:14:47 -08003389
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003391 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003393 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 (XML_StartElementHandler) expat_start_handler,
3395 (XML_EndElementHandler) expat_end_handler
3396 );
3397 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003398 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 (XML_DefaultHandler) expat_default_handler
3400 );
3401 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003402 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403 (XML_CharacterDataHandler) expat_data_handler
3404 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003405 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003407 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003408 (XML_CommentHandler) expat_comment_handler
3409 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003410 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003412 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 (XML_ProcessingInstructionHandler) expat_pi_handler
3414 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003415 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003416 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003417 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3418 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003421 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003423
Eli Bendersky52467b12012-06-01 07:13:08 +03003424 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003425}
3426
Eli Bendersky52467b12012-06-01 07:13:08 +03003427static int
3428xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3429{
3430 Py_VISIT(self->handle_close);
3431 Py_VISIT(self->handle_pi);
3432 Py_VISIT(self->handle_comment);
3433 Py_VISIT(self->handle_end);
3434 Py_VISIT(self->handle_data);
3435 Py_VISIT(self->handle_start);
3436
3437 Py_VISIT(self->target);
3438 Py_VISIT(self->entity);
3439 Py_VISIT(self->names);
3440
3441 return 0;
3442}
3443
3444static int
3445xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446{
Victor Stinnere727d412017-09-18 05:29:37 -07003447 if (self->parser != NULL) {
3448 XML_Parser parser = self->parser;
3449 self->parser = NULL;
3450 EXPAT(ParserFree)(parser);
3451 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452
Antoine Pitrouc1948842012-10-01 23:40:37 +02003453 Py_CLEAR(self->handle_close);
3454 Py_CLEAR(self->handle_pi);
3455 Py_CLEAR(self->handle_comment);
3456 Py_CLEAR(self->handle_end);
3457 Py_CLEAR(self->handle_data);
3458 Py_CLEAR(self->handle_start);
3459 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460
Antoine Pitrouc1948842012-10-01 23:40:37 +02003461 Py_CLEAR(self->target);
3462 Py_CLEAR(self->entity);
3463 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464
Eli Bendersky52467b12012-06-01 07:13:08 +03003465 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003466}
3467
Eli Bendersky52467b12012-06-01 07:13:08 +03003468static void
3469xmlparser_dealloc(XMLParserObject* self)
3470{
3471 PyObject_GC_UnTrack(self);
3472 xmlparser_gc_clear(self);
3473 Py_TYPE(self)->tp_free((PyObject *)self);
3474}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475
3476LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003477expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478{
3479 int ok;
3480
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003481 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3483
3484 if (PyErr_Occurred())
3485 return NULL;
3486
3487 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003488 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003489 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003491 EXPAT(GetErrorColumnNumber)(self->parser),
3492 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003493 );
3494 return NULL;
3495 }
3496
3497 Py_RETURN_NONE;
3498}
3499
Serhiy Storchakacb985562015-05-04 15:32:48 +03003500/*[clinic input]
3501_elementtree.XMLParser.close
3502
3503[clinic start generated code]*/
3504
3505static PyObject *
3506_elementtree_XMLParser_close_impl(XMLParserObject *self)
3507/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508{
3509 /* end feeding data to parser */
3510
3511 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003513 if (!res)
3514 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003516 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 Py_DECREF(res);
3518 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003519 }
3520 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003521 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003522 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003523 }
3524 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003525 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003526 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527}
3528
Serhiy Storchakacb985562015-05-04 15:32:48 +03003529/*[clinic input]
3530_elementtree.XMLParser.feed
3531
3532 data: object
3533 /
3534
3535[clinic start generated code]*/
3536
3537static PyObject *
3538_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3539/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540{
3541 /* feed data to parser */
3542
Serhiy Storchakacb985562015-05-04 15:32:48 +03003543 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003544 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3546 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003547 return NULL;
3548 if (data_len > INT_MAX) {
3549 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3550 return NULL;
3551 }
3552 /* Explicitly set UTF-8 encoding. Return code ignored. */
3553 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003554 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003555 }
3556 else {
3557 Py_buffer view;
3558 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003559 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003560 return NULL;
3561 if (view.len > INT_MAX) {
3562 PyBuffer_Release(&view);
3563 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3564 return NULL;
3565 }
3566 res = expat_parse(self, view.buf, (int)view.len, 0);
3567 PyBuffer_Release(&view);
3568 return res;
3569 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003570}
3571
Serhiy Storchakacb985562015-05-04 15:32:48 +03003572/*[clinic input]
3573_elementtree.XMLParser._parse_whole
3574
3575 file: object
3576 /
3577
3578[clinic start generated code]*/
3579
3580static PyObject *
3581_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3582/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583{
Eli Benderskya3699232013-05-19 18:47:23 -07003584 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 PyObject* reader;
3586 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003587 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 PyObject* res;
3589
Serhiy Storchakacb985562015-05-04 15:32:48 +03003590 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 if (!reader)
3592 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003593
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594 /* read from open file object */
3595 for (;;) {
3596
3597 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3598
3599 if (!buffer) {
3600 /* read failed (e.g. due to KeyboardInterrupt) */
3601 Py_DECREF(reader);
3602 return NULL;
3603 }
3604
Eli Benderskyf996e772012-03-16 05:53:30 +02003605 if (PyUnicode_CheckExact(buffer)) {
3606 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003607 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003608 Py_DECREF(buffer);
3609 break;
3610 }
3611 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003612 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003613 if (!temp) {
3614 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003615 Py_DECREF(reader);
3616 return NULL;
3617 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003618 buffer = temp;
3619 }
3620 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621 Py_DECREF(buffer);
3622 break;
3623 }
3624
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003625 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3626 Py_DECREF(buffer);
3627 Py_DECREF(reader);
3628 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3629 return NULL;
3630 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003632 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 );
3634
3635 Py_DECREF(buffer);
3636
3637 if (!res) {
3638 Py_DECREF(reader);
3639 return NULL;
3640 }
3641 Py_DECREF(res);
3642
3643 }
3644
3645 Py_DECREF(reader);
3646
3647 res = expat_parse(self, "", 0, 1);
3648
3649 if (res && TreeBuilder_CheckExact(self->target)) {
3650 Py_DECREF(res);
3651 return treebuilder_done((TreeBuilderObject*) self->target);
3652 }
3653
3654 return res;
3655}
3656
Serhiy Storchakacb985562015-05-04 15:32:48 +03003657/*[clinic input]
3658_elementtree.XMLParser.doctype
3659
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003660 name: object
3661 pubid: object
3662 system: object
3663 /
3664
Serhiy Storchakacb985562015-05-04 15:32:48 +03003665[clinic start generated code]*/
3666
3667static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003668_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3669 PyObject *pubid, PyObject *system)
3670/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003671{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003672 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3673 "This method of XMLParser is deprecated. Define"
3674 " doctype() method on the TreeBuilder target.",
3675 1) < 0) {
3676 return NULL;
3677 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003678 Py_RETURN_NONE;
3679}
3680
Serhiy Storchakacb985562015-05-04 15:32:48 +03003681/*[clinic input]
3682_elementtree.XMLParser._setevents
3683
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003684 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003685 events_to_report: object = None
3686 /
3687
3688[clinic start generated code]*/
3689
3690static PyObject *
3691_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3692 PyObject *events_queue,
3693 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003694/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695{
3696 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003697 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003698 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003699 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700
3701 if (!TreeBuilder_CheckExact(self->target)) {
3702 PyErr_SetString(
3703 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003704 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003705 "targets"
3706 );
3707 return NULL;
3708 }
3709
3710 target = (TreeBuilderObject*) self->target;
3711
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003712 events_append = PyObject_GetAttrString(events_queue, "append");
3713 if (events_append == NULL)
3714 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003715 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003716
3717 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003718 Py_CLEAR(target->start_event_obj);
3719 Py_CLEAR(target->end_event_obj);
3720 Py_CLEAR(target->start_ns_event_obj);
3721 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003723 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003725 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003726 Py_RETURN_NONE;
3727 }
3728
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003729 if (!(events_seq = PySequence_Fast(events_to_report,
3730 "events must be a sequence"))) {
3731 return NULL;
3732 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003733
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003734 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003735 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003736 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003737 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003738 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003739 } else if (PyBytes_Check(event_name_obj)) {
3740 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003741 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003742 if (event_name == NULL) {
3743 Py_DECREF(events_seq);
3744 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3745 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003746 }
3747
3748 Py_INCREF(event_name_obj);
3749 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003750 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003751 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003752 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003753 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003754 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003755 EXPAT(SetNamespaceDeclHandler)(
3756 self->parser,
3757 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3758 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3759 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003760 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003761 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762 EXPAT(SetNamespaceDeclHandler)(
3763 self->parser,
3764 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3765 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3766 );
3767 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003768 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003769 Py_DECREF(events_seq);
3770 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003771 return NULL;
3772 }
3773 }
3774
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003775 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003776 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003777}
3778
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003779static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003780xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003781{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003782 if (PyUnicode_Check(nameobj)) {
3783 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003784 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003785 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003786 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003787 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003788 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003789 return PyUnicode_FromFormat(
3790 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003791 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003792 }
3793 else
3794 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003795
Alexander Belopolskye239d232010-12-08 23:31:48 +00003796 Py_INCREF(res);
3797 return res;
3798 }
3799 generic:
3800 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003801}
3802
Serhiy Storchakacb985562015-05-04 15:32:48 +03003803#include "clinic/_elementtree.c.h"
3804
3805static PyMethodDef element_methods[] = {
3806
3807 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3808
3809 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3810 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3811
3812 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3813 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3814 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3815
3816 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3817 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3818 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3819 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3820
3821 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3822 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3823 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3824
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003825 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003826 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3827
3828 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3829 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3830
3831 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3832
3833 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3834 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3835 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3836 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3837 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3838
3839 {NULL, NULL}
3840};
3841
3842static PyMappingMethods element_as_mapping = {
3843 (lenfunc) element_length,
3844 (binaryfunc) element_subscr,
3845 (objobjargproc) element_ass_subscr,
3846};
3847
Serhiy Storchakadde08152015-11-25 15:28:13 +02003848static PyGetSetDef element_getsetlist[] = {
3849 {"tag",
3850 (getter)element_tag_getter,
3851 (setter)element_tag_setter,
3852 "A string identifying what kind of data this element represents"},
3853 {"text",
3854 (getter)element_text_getter,
3855 (setter)element_text_setter,
3856 "A string of text directly after the start tag, or None"},
3857 {"tail",
3858 (getter)element_tail_getter,
3859 (setter)element_tail_setter,
3860 "A string of text directly after the end tag, or None"},
3861 {"attrib",
3862 (getter)element_attrib_getter,
3863 (setter)element_attrib_setter,
3864 "A dictionary containing the element's attributes"},
3865 {NULL},
3866};
3867
Serhiy Storchakacb985562015-05-04 15:32:48 +03003868static PyTypeObject Element_Type = {
3869 PyVarObject_HEAD_INIT(NULL, 0)
3870 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3871 /* methods */
3872 (destructor)element_dealloc, /* tp_dealloc */
3873 0, /* tp_print */
3874 0, /* tp_getattr */
3875 0, /* tp_setattr */
3876 0, /* tp_reserved */
3877 (reprfunc)element_repr, /* tp_repr */
3878 0, /* tp_as_number */
3879 &element_as_sequence, /* tp_as_sequence */
3880 &element_as_mapping, /* tp_as_mapping */
3881 0, /* tp_hash */
3882 0, /* tp_call */
3883 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003884 PyObject_GenericGetAttr, /* tp_getattro */
3885 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003886 0, /* tp_as_buffer */
3887 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3888 /* tp_flags */
3889 0, /* tp_doc */
3890 (traverseproc)element_gc_traverse, /* tp_traverse */
3891 (inquiry)element_gc_clear, /* tp_clear */
3892 0, /* tp_richcompare */
3893 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3894 0, /* tp_iter */
3895 0, /* tp_iternext */
3896 element_methods, /* tp_methods */
3897 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003898 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003899 0, /* tp_base */
3900 0, /* tp_dict */
3901 0, /* tp_descr_get */
3902 0, /* tp_descr_set */
3903 0, /* tp_dictoffset */
3904 (initproc)element_init, /* tp_init */
3905 PyType_GenericAlloc, /* tp_alloc */
3906 element_new, /* tp_new */
3907 0, /* tp_free */
3908};
3909
3910static PyMethodDef treebuilder_methods[] = {
3911 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3912 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3913 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3914 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3915 {NULL, NULL}
3916};
3917
3918static PyTypeObject TreeBuilder_Type = {
3919 PyVarObject_HEAD_INIT(NULL, 0)
3920 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3921 /* methods */
3922 (destructor)treebuilder_dealloc, /* tp_dealloc */
3923 0, /* tp_print */
3924 0, /* tp_getattr */
3925 0, /* tp_setattr */
3926 0, /* tp_reserved */
3927 0, /* tp_repr */
3928 0, /* tp_as_number */
3929 0, /* tp_as_sequence */
3930 0, /* tp_as_mapping */
3931 0, /* tp_hash */
3932 0, /* tp_call */
3933 0, /* tp_str */
3934 0, /* tp_getattro */
3935 0, /* tp_setattro */
3936 0, /* tp_as_buffer */
3937 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3938 /* tp_flags */
3939 0, /* tp_doc */
3940 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3941 (inquiry)treebuilder_gc_clear, /* tp_clear */
3942 0, /* tp_richcompare */
3943 0, /* tp_weaklistoffset */
3944 0, /* tp_iter */
3945 0, /* tp_iternext */
3946 treebuilder_methods, /* tp_methods */
3947 0, /* tp_members */
3948 0, /* tp_getset */
3949 0, /* tp_base */
3950 0, /* tp_dict */
3951 0, /* tp_descr_get */
3952 0, /* tp_descr_set */
3953 0, /* tp_dictoffset */
3954 _elementtree_TreeBuilder___init__, /* tp_init */
3955 PyType_GenericAlloc, /* tp_alloc */
3956 treebuilder_new, /* tp_new */
3957 0, /* tp_free */
3958};
3959
3960static PyMethodDef xmlparser_methods[] = {
3961 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3962 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3963 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3964 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3965 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3966 {NULL, NULL}
3967};
3968
Neal Norwitz227b5332006-03-22 09:28:35 +00003969static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003970 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003971 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 (destructor)xmlparser_dealloc, /* tp_dealloc */
3974 0, /* tp_print */
3975 0, /* tp_getattr */
3976 0, /* tp_setattr */
3977 0, /* tp_reserved */
3978 0, /* tp_repr */
3979 0, /* tp_as_number */
3980 0, /* tp_as_sequence */
3981 0, /* tp_as_mapping */
3982 0, /* tp_hash */
3983 0, /* tp_call */
3984 0, /* tp_str */
3985 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3986 0, /* tp_setattro */
3987 0, /* tp_as_buffer */
3988 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3989 /* tp_flags */
3990 0, /* tp_doc */
3991 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3992 (inquiry)xmlparser_gc_clear, /* tp_clear */
3993 0, /* tp_richcompare */
3994 0, /* tp_weaklistoffset */
3995 0, /* tp_iter */
3996 0, /* tp_iternext */
3997 xmlparser_methods, /* tp_methods */
3998 0, /* tp_members */
3999 0, /* tp_getset */
4000 0, /* tp_base */
4001 0, /* tp_dict */
4002 0, /* tp_descr_get */
4003 0, /* tp_descr_set */
4004 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03004005 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03004006 PyType_GenericAlloc, /* tp_alloc */
4007 xmlparser_new, /* tp_new */
4008 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004009};
4010
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004011/* ==================================================================== */
4012/* python module interface */
4013
4014static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08004015 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004016 {NULL, NULL}
4017};
4018
Martin v. Löwis1a214512008-06-11 05:26:20 +00004019
Eli Bendersky532d03e2013-08-10 08:00:39 -07004020static struct PyModuleDef elementtreemodule = {
4021 PyModuleDef_HEAD_INIT,
4022 "_elementtree",
4023 NULL,
4024 sizeof(elementtreestate),
4025 _functions,
4026 NULL,
4027 elementtree_traverse,
4028 elementtree_clear,
4029 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00004030};
4031
Neal Norwitzf6657e62006-12-28 04:47:50 +00004032PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004033PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004034{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004035 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004036 elementtreestate *st;
4037
4038 m = PyState_FindModule(&elementtreemodule);
4039 if (m) {
4040 Py_INCREF(m);
4041 return m;
4042 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004043
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004044 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004045 if (PyType_Ready(&ElementIter_Type) < 0)
4046 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004047 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004048 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004049 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004050 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004051 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004052 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004053
Eli Bendersky532d03e2013-08-10 08:00:39 -07004054 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004055 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004056 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004057 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004058
Eli Bendersky828efde2012-04-05 05:40:58 +03004059 if (!(temp = PyImport_ImportModule("copy")))
4060 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004061 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004062 Py_XDECREF(temp);
4063
Victor Stinnerb136f112017-07-10 22:28:02 +02004064 if (st->deepcopy_obj == NULL) {
4065 return NULL;
4066 }
4067
4068 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004069 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004070 return NULL;
4071
Eli Bendersky20d41742012-06-01 09:48:37 +03004072 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004073 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4074 if (expat_capi) {
4075 /* check that it's usable */
4076 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004077 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004078 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4079 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004080 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004081 PyErr_SetString(PyExc_ImportError,
4082 "pyexpat version is incompatible");
4083 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004084 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004085 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004086 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004087 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004088
Eli Bendersky532d03e2013-08-10 08:00:39 -07004089 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004090 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004091 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004092 Py_INCREF(st->parseerror_obj);
4093 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004094
Eli Bendersky092af1f2012-03-04 07:14:03 +02004095 Py_INCREF((PyObject *)&Element_Type);
4096 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4097
Eli Bendersky58d548d2012-05-29 15:45:16 +03004098 Py_INCREF((PyObject *)&TreeBuilder_Type);
4099 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4100
Eli Bendersky52467b12012-06-01 07:13:08 +03004101 Py_INCREF((PyObject *)&XMLParser_Type);
4102 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004103
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004104 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004105}