blob: 78a52e859df2a48a3410a1d7308514df3f15ca52 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Oren Milman39ecb9c2017-10-10 23:26:24 +030064/* Py_SETREF for a PyObject* that uses a join flag. */
65Py_LOCAL_INLINE(void)
66_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
67{
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = new_joined_ptr;
70 Py_DECREF(tmp);
71}
72
Eli Benderskydd3661e2013-09-13 06:24:25 -070073/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
74 * reference since this function sets it to NULL.
75*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020076static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070077{
78 if (*p) {
Oren Milman39ecb9c2017-10-10 23:26:24 +030079 _set_joined_ptr(p, NULL);
Eli Benderskydd3661e2013-09-13 06:24:25 -070080 }
81}
82
Ronald Oussoren138d0802013-07-19 11:11:25 +020083/* Types defined by this extension */
84static PyTypeObject Element_Type;
85static PyTypeObject ElementIter_Type;
86static PyTypeObject TreeBuilder_Type;
87static PyTypeObject XMLParser_Type;
88
89
Eli Bendersky532d03e2013-08-10 08:00:39 -070090/* Per-module state; PEP 3121 */
91typedef struct {
92 PyObject *parseerror_obj;
93 PyObject *deepcopy_obj;
94 PyObject *elementpath_obj;
95} elementtreestate;
96
97static struct PyModuleDef elementtreemodule;
98
99/* Given a module object (assumed to be _elementtree), get its per-module
100 * state.
101 */
102#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
103
104/* Find the module instance imported in the currently running sub-interpreter
105 * and get its state.
106 */
107#define ET_STATE_GLOBAL \
108 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
109
110static int
111elementtree_clear(PyObject *m)
112{
113 elementtreestate *st = ET_STATE(m);
114 Py_CLEAR(st->parseerror_obj);
115 Py_CLEAR(st->deepcopy_obj);
116 Py_CLEAR(st->elementpath_obj);
117 return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123 elementtreestate *st = ET_STATE(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 return 0;
128}
129
130static void
131elementtree_free(void *m)
132{
133 elementtree_clear((PyObject *)m);
134}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135
136/* helpers */
137
138LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139list_join(PyObject* list)
140{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300141 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 PyObject* result;
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000146 if (!joiner)
147 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200148 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
Eli Bendersky48d358b2012-05-30 17:57:50 +0300153/* Is the given object an empty dictionary?
154*/
155static int
156is_empty_dict(PyObject *obj)
157{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200158 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300159}
160
161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000162/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200163/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164
165typedef struct {
166
167 /* attributes (a dictionary object), or None if no attributes */
168 PyObject* attrib;
169
170 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200171 Py_ssize_t length; /* actual number of items */
172 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000173
174 /* this either points to _children or to a malloced buffer */
175 PyObject* *children;
176
177 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179} ElementObjectExtra;
180
181typedef struct {
182 PyObject_HEAD
183
184 /* element tag (a string). */
185 PyObject* tag;
186
187 /* text before first child. note that this is a tagged pointer;
188 use JOIN_OBJ to get the object pointer. the join flag is used
189 to distinguish lists created by the tree builder from lists
190 assigned to the attribute by application code; the former
191 should be joined before being returned to the user, the latter
192 should be left intact. */
193 PyObject* text;
194
195 /* text after this element, in parent. note that this is a tagged
196 pointer; use JOIN_OBJ to get the object pointer. */
197 PyObject* tail;
198
199 ElementObjectExtra* extra;
200
Eli Benderskyebf37a22012-04-03 22:02:37 +0300201 PyObject *weakreflist; /* For tp_weaklistoffset */
202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203} ElementObject;
204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
Christian Heimes90aa7642007-12-19 02:45:37 +0000206#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Miss Islington (bot)b1c80032018-10-14 00:55:49 -0700207#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000209
210/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200211/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200214create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000215{
216 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200217 if (!self->extra) {
218 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000219 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200220 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000221
222 if (!attrib)
223 attrib = Py_None;
224
225 Py_INCREF(attrib);
226 self->extra->attrib = attrib;
227
228 self->extra->length = 0;
229 self->extra->allocated = STATIC_CHILDREN;
230 self->extra->children = self->extra->_children;
231
232 return 0;
233}
234
235LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200236dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000237{
Eli Bendersky08b85292012-04-04 15:55:07 +0300238 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200239 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300240
Eli Benderskyebf37a22012-04-03 22:02:37 +0300241 if (!self->extra)
242 return;
243
244 /* Avoid DECREFs calling into this code again (cycles, etc.)
245 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300246 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 self->extra = NULL;
248
249 Py_DECREF(myextra->attrib);
250
Eli Benderskyebf37a22012-04-03 22:02:37 +0300251 for (i = 0; i < myextra->length; i++)
252 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000253
Eli Benderskyebf37a22012-04-03 22:02:37 +0300254 if (myextra->children != myextra->_children)
255 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256
Eli Benderskyebf37a22012-04-03 22:02:37 +0300257 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258}
259
Eli Bendersky092af1f2012-03-04 07:14:03 +0200260/* Convenience internal function to create new Element objects with the given
261 * tag and attributes.
262*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000263LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200264create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000265{
266 ElementObject* self;
267
Eli Bendersky0192ba32012-03-30 16:38:33 +0300268 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000269 if (self == NULL)
270 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271 self->extra = NULL;
272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273 Py_INCREF(tag);
274 self->tag = tag;
275
276 Py_INCREF(Py_None);
277 self->text = Py_None;
278
279 Py_INCREF(Py_None);
280 self->tail = Py_None;
281
Eli Benderskyebf37a22012-04-03 22:02:37 +0300282 self->weakreflist = NULL;
283
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200284 ALLOC(sizeof(ElementObject), "create element");
285 PyObject_GC_Track(self);
286
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200287 if (attrib != Py_None && !is_empty_dict(attrib)) {
288 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200289 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200290 return NULL;
291 }
292 }
293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000294 return (PyObject*) self;
295}
296
Eli Bendersky092af1f2012-03-04 07:14:03 +0200297static PyObject *
298element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
299{
300 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
301 if (e != NULL) {
302 Py_INCREF(Py_None);
303 e->tag = Py_None;
304
305 Py_INCREF(Py_None);
306 e->text = Py_None;
307
308 Py_INCREF(Py_None);
309 e->tail = Py_None;
310
311 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300312 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200313 }
314 return (PyObject *)e;
315}
316
Eli Bendersky737b1732012-05-29 06:02:56 +0300317/* Helper function for extracting the attrib dictionary from a keywords dict.
318 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800319 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300320 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700321 *
322 * Return a dictionary with the content of kwds merged into the content of
323 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300324 */
325static PyObject*
326get_attrib_from_keywords(PyObject *kwds)
327{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700328 PyObject *attrib_str = PyUnicode_FromString("attrib");
329 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330
331 if (attrib) {
332 /* If attrib was found in kwds, copy its value and remove it from
333 * kwds
334 */
335 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700336 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
338 Py_TYPE(attrib)->tp_name);
339 return NULL;
340 }
341 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700342 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300343 } else {
344 attrib = PyDict_New();
345 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700346
347 Py_DECREF(attrib_str);
348
349 /* attrib can be NULL if PyDict_New failed */
350 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200351 if (PyDict_Update(attrib, kwds) < 0)
352 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300353 return attrib;
354}
355
Serhiy Storchakacb985562015-05-04 15:32:48 +0300356/*[clinic input]
357module _elementtree
358class _elementtree.Element "ElementObject *" "&Element_Type"
359class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
360class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
361[clinic start generated code]*/
362/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
363
Eli Bendersky092af1f2012-03-04 07:14:03 +0200364static int
365element_init(PyObject *self, PyObject *args, PyObject *kwds)
366{
367 PyObject *tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200368 PyObject *attrib = NULL;
369 ElementObject *self_elem;
370
371 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
372 return -1;
373
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 if (attrib) {
375 /* attrib passed as positional arg */
376 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200377 if (!attrib)
378 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300379 if (kwds) {
380 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200381 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 return -1;
383 }
384 }
385 } else if (kwds) {
386 /* have keywords args */
387 attrib = get_attrib_from_keywords(kwds);
388 if (!attrib)
389 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200390 }
391
392 self_elem = (ElementObject *)self;
393
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 return -1;
398 }
399 }
400
Eli Bendersky48d358b2012-05-30 17:57:50 +0300401 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403
404 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300406 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300409 _set_joined_ptr(&self_elem->text, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300412 _set_joined_ptr(&self_elem->tail, Py_None);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413
414 return 0;
415}
416
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200420 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000421 PyObject* *children;
422
423 /* make sure self->children can hold the given number of extra
424 elements. set an exception and return -1 if allocation failed */
425
Victor Stinner5f0af232013-07-11 23:01:36 +0200426 if (!self->extra) {
427 if (create_extra(self, NULL) < 0)
428 return -1;
429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432
433 if (size > self->extra->allocated) {
434 /* use Python 2.4's list growth strategy */
435 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000436 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100437 * which needs at least 4 bytes.
438 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000439 * be safe.
440 */
441 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
443 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100446 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * false alarm always assume at least one child to be safe.
448 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 children = PyObject_Realloc(self->extra->children,
450 size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 } else {
454 children = PyObject_Malloc(size * sizeof(PyObject*));
455 if (!children)
456 goto nomemory;
457 /* copy existing children from static area to malloc buffer */
458 memcpy(children, self->extra->children,
459 self->extra->length * sizeof(PyObject*));
460 }
461 self->extra->children = children;
462 self->extra->allocated = size;
463 }
464
465 return 0;
466
467 nomemory:
468 PyErr_NoMemory();
469 return -1;
470}
471
472LOCAL(int)
473element_add_subelement(ElementObject* self, PyObject* element)
474{
475 /* add a child element to a parent */
476
477 if (element_resize(self, 1) < 0)
478 return -1;
479
480 Py_INCREF(element);
481 self->extra->children[self->extra->length] = element;
482
483 self->extra->length++;
484
485 return 0;
486}
487
488LOCAL(PyObject*)
489element_get_attrib(ElementObject* self)
490{
491 /* return borrowed reference to attrib dictionary */
492 /* note: this function assumes that the extra section exists */
493
494 PyObject* res = self->extra->attrib;
495
496 if (res == Py_None) {
497 /* create missing dictionary */
498 res = PyDict_New();
499 if (!res)
500 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200501 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000502 self->extra->attrib = res;
503 }
504
505 return res;
506}
507
508LOCAL(PyObject*)
509element_get_text(ElementObject* self)
510{
511 /* return borrowed reference to text attribute */
512
Serhiy Storchaka576def02017-03-30 09:47:31 +0300513 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000514
515 if (JOIN_GET(res)) {
516 res = JOIN_OBJ(res);
517 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300518 PyObject *tmp = list_join(res);
519 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300521 self->text = tmp;
522 Py_DECREF(res);
523 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000524 }
525 }
526
527 return res;
528}
529
530LOCAL(PyObject*)
531element_get_tail(ElementObject* self)
532{
533 /* return borrowed reference to text attribute */
534
Serhiy Storchaka576def02017-03-30 09:47:31 +0300535 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000536
537 if (JOIN_GET(res)) {
538 res = JOIN_OBJ(res);
539 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300540 PyObject *tmp = list_join(res);
541 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300543 self->tail = tmp;
544 Py_DECREF(res);
545 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000546 }
547 }
548
549 return res;
550}
551
552static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300553subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000554{
555 PyObject* elem;
556
557 ElementObject* parent;
558 PyObject* tag;
559 PyObject* attrib = NULL;
560 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
561 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800562 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800564 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565
Eli Bendersky737b1732012-05-29 06:02:56 +0300566 if (attrib) {
567 /* attrib passed as positional arg */
568 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000569 if (!attrib)
570 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300571 if (kwds) {
572 if (PyDict_Update(attrib, kwds) < 0) {
573 return NULL;
574 }
575 }
576 } else if (kwds) {
577 /* have keyword args */
578 attrib = get_attrib_from_keywords(kwds);
579 if (!attrib)
580 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000581 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300582 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 Py_INCREF(Py_None);
584 attrib = Py_None;
585 }
586
Eli Bendersky092af1f2012-03-04 07:14:03 +0200587 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200589 if (elem == NULL)
590 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000591
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000592 if (element_add_subelement(parent, elem) < 0) {
593 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000594 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000595 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596
597 return elem;
598}
599
Eli Bendersky0192ba32012-03-30 16:38:33 +0300600static int
601element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
602{
603 Py_VISIT(self->tag);
604 Py_VISIT(JOIN_OBJ(self->text));
605 Py_VISIT(JOIN_OBJ(self->tail));
606
607 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200608 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300609 Py_VISIT(self->extra->attrib);
610
611 for (i = 0; i < self->extra->length; ++i)
612 Py_VISIT(self->extra->children[i]);
613 }
614 return 0;
615}
616
617static int
618element_gc_clear(ElementObject *self)
619{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700621 _clear_joined_ptr(&self->text);
622 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300623
624 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300625 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300626 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300627 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 return 0;
629}
630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000631static void
632element_dealloc(ElementObject* self)
633{
INADA Naokia6296d32017-08-24 14:55:17 +0900634 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200636 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300637
638 if (self->weakreflist != NULL)
639 PyObject_ClearWeakRefs((PyObject *) self);
640
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 /* element_gc_clear clears all references and deallocates extra
642 */
643 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
645 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200646 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200647 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000648}
649
650/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651
Serhiy Storchakacb985562015-05-04 15:32:48 +0300652/*[clinic input]
653_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000654
Serhiy Storchakacb985562015-05-04 15:32:48 +0300655 subelement: object(subclass_of='&Element_Type')
656 /
657
658[clinic start generated code]*/
659
660static PyObject *
661_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
662/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
663{
664 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665 return NULL;
666
667 Py_RETURN_NONE;
668}
669
Serhiy Storchakacb985562015-05-04 15:32:48 +0300670/*[clinic input]
671_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000672
Serhiy Storchakacb985562015-05-04 15:32:48 +0300673[clinic start generated code]*/
674
675static PyObject *
676_elementtree_Element_clear_impl(ElementObject *self)
677/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
678{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300679 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000680
681 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300682 _set_joined_ptr(&self->text, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000683
684 Py_INCREF(Py_None);
Oren Milman39ecb9c2017-10-10 23:26:24 +0300685 _set_joined_ptr(&self->tail, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000686
687 Py_RETURN_NONE;
688}
689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690/*[clinic input]
691_elementtree.Element.__copy__
692
693[clinic start generated code]*/
694
695static PyObject *
696_elementtree_Element___copy___impl(ElementObject *self)
697/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200699 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000700 ElementObject* element;
701
Eli Bendersky092af1f2012-03-04 07:14:03 +0200702 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800703 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000704 if (!element)
705 return NULL;
706
Oren Milman39ecb9c2017-10-10 23:26:24 +0300707 Py_INCREF(JOIN_OBJ(self->text));
708 _set_joined_ptr(&element->text, self->text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000709
Oren Milman39ecb9c2017-10-10 23:26:24 +0300710 Py_INCREF(JOIN_OBJ(self->tail));
711 _set_joined_ptr(&element->tail, self->tail);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000712
713 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 if (element_resize(element, self->extra->length) < 0) {
715 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718
719 for (i = 0; i < self->extra->length; i++) {
720 Py_INCREF(self->extra->children[i]);
721 element->extra->children[i] = self->extra->children[i];
722 }
723
724 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 }
726
727 return (PyObject*) element;
728}
729
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200730/* Helper for a deep copy. */
731LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
732
Serhiy Storchakacb985562015-05-04 15:32:48 +0300733/*[clinic input]
734_elementtree.Element.__deepcopy__
735
Oren Milmand0568182017-09-12 17:39:15 +0300736 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300737 /
738
739[clinic start generated code]*/
740
741static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300742_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
743/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200745 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 ElementObject* element;
747 PyObject* tag;
748 PyObject* attrib;
749 PyObject* text;
750 PyObject* tail;
751 PyObject* id;
752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 tag = deepcopy(self->tag, memo);
754 if (!tag)
755 return NULL;
756
757 if (self->extra) {
758 attrib = deepcopy(self->extra->attrib, memo);
759 if (!attrib) {
760 Py_DECREF(tag);
761 return NULL;
762 }
763 } else {
764 Py_INCREF(Py_None);
765 attrib = Py_None;
766 }
767
Eli Bendersky092af1f2012-03-04 07:14:03 +0200768 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769
770 Py_DECREF(tag);
771 Py_DECREF(attrib);
772
773 if (!element)
774 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776 text = deepcopy(JOIN_OBJ(self->text), memo);
777 if (!text)
778 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300779 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000780
781 tail = deepcopy(JOIN_OBJ(self->tail), memo);
782 if (!tail)
783 goto error;
Oren Milman39ecb9c2017-10-10 23:26:24 +0300784 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000785
786 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787 if (element_resize(element, self->extra->length) < 0)
788 goto error;
789
790 for (i = 0; i < self->extra->length; i++) {
791 PyObject* child = deepcopy(self->extra->children[i], memo);
792 if (!child) {
793 element->extra->length = i;
794 goto error;
795 }
796 element->extra->children[i] = child;
797 }
798
799 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000800 }
801
802 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700803 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000804 if (!id)
805 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000806
807 i = PyDict_SetItem(memo, id, (PyObject*) element);
808
809 Py_DECREF(id);
810
811 if (i < 0)
812 goto error;
813
814 return (PyObject*) element;
815
816 error:
817 Py_DECREF(element);
818 return NULL;
819}
820
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200821LOCAL(PyObject *)
822deepcopy(PyObject *object, PyObject *memo)
823{
824 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200825 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200826 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200827
828 /* Fast paths */
829 if (object == Py_None || PyUnicode_CheckExact(object)) {
830 Py_INCREF(object);
831 return object;
832 }
833
834 if (Py_REFCNT(object) == 1) {
835 if (PyDict_CheckExact(object)) {
836 PyObject *key, *value;
837 Py_ssize_t pos = 0;
838 int simple = 1;
839 while (PyDict_Next(object, &pos, &key, &value)) {
840 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
841 simple = 0;
842 break;
843 }
844 }
845 if (simple)
846 return PyDict_Copy(object);
847 /* Fall through to general case */
848 }
849 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300850 return _elementtree_Element___deepcopy___impl(
851 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200852 }
853 }
854
855 /* General case */
856 st = ET_STATE_GLOBAL;
857 if (!st->deepcopy_obj) {
858 PyErr_SetString(PyExc_RuntimeError,
859 "deepcopy helper not found");
860 return NULL;
861 }
862
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 stack[0] = object;
864 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200865 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Oren Milman39ecb9c2017-10-10 23:26:24 +0300969 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
970 Py_INCREF(JOIN_OBJ(text));
971 _set_joined_ptr(&self->text, text);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Oren Milman39ecb9c2017-10-10 23:26:24 +0300973 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
974 Py_INCREF(JOIN_OBJ(tail));
975 _set_joined_ptr(&self->tail, tail);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001137 if (!Element_Check(element)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001178 return _PyObject_CallMethodIdObjArgs(
1179 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001189 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001223 return _PyObject_CallMethodIdObjArgs(
1224 st->elementpath_obj, &PyId_findtext,
1225 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 );
1227
1228 if (!self->extra) {
1229 Py_INCREF(default_value);
1230 return default_value;
1231 }
1232
1233 for (i = 0; i < self->extra->length; i++) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001234 PyObject *item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001235 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001236 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 continue;
1238 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001239 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (rc > 0) {
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001241 PyObject* text = element_get_text((ElementObject*)item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 if (text == Py_None) {
1243 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001244 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001246 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248 return text;
1249 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001250 Py_DECREF(item);
1251 if (rc < 0)
1252 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 }
1254
1255 Py_INCREF(default_value);
1256 return default_value;
1257}
1258
Serhiy Storchakacb985562015-05-04 15:32:48 +03001259/*[clinic input]
1260_elementtree.Element.findall
1261
1262 path: object
1263 namespaces: object = None
1264
1265[clinic start generated code]*/
1266
1267static PyObject *
1268_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1269 PyObject *namespaces)
1270/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001272 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 PyObject* out;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001274 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001276 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001278 return _PyObject_CallMethodIdObjArgs(
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001279 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001281 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282
1283 out = PyList_New(0);
1284 if (!out)
1285 return NULL;
1286
1287 if (!self->extra)
1288 return out;
1289
1290 for (i = 0; i < self->extra->length; i++) {
1291 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 int rc;
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001293 if (!Element_Check(item))
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 continue;
1295 Py_INCREF(item);
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07001296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001297 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1298 Py_DECREF(item);
1299 Py_DECREF(out);
1300 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 return out;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310
Serhiy Storchakacb985562015-05-04 15:32:48 +03001311 path: object
1312 namespaces: object = None
1313
1314[clinic start generated code]*/
1315
1316static PyObject *
1317_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1318 PyObject *namespaces)
1319/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1320{
1321 PyObject* tag = path;
1322 _Py_IDENTIFIER(iterfind);
1323 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324
Victor Stinnerf5616342016-12-09 15:26:00 +01001325 return _PyObject_CallMethodIdObjArgs(
1326 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.get
1331
1332 key: object
1333 default: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1339 PyObject *default_value)
1340/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
1342 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 if (!self->extra || self->extra->attrib == Py_None)
1345 value = default_value;
1346 else {
1347 value = PyDict_GetItem(self->extra->attrib, key);
1348 if (!value)
1349 value = default_value;
1350 }
1351
1352 Py_INCREF(value);
1353 return value;
1354}
1355
Serhiy Storchakacb985562015-05-04 15:32:48 +03001356/*[clinic input]
1357_elementtree.Element.getchildren
1358
1359[clinic start generated code]*/
1360
1361static PyObject *
1362_elementtree_Element_getchildren_impl(ElementObject *self)
1363/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001365 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 PyObject* list;
1367
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001368 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1369 "This method will be removed in future versions. "
1370 "Use 'list(elem)' or iteration over elem instead.",
1371 1) < 0) {
1372 return NULL;
1373 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001374
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001375 if (!self->extra)
1376 return PyList_New(0);
1377
1378 list = PyList_New(self->extra->length);
1379 if (!list)
1380 return NULL;
1381
1382 for (i = 0; i < self->extra->length; i++) {
1383 PyObject* item = self->extra->children[i];
1384 Py_INCREF(item);
1385 PyList_SET_ITEM(list, i, item);
1386 }
1387
1388 return list;
1389}
1390
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001391
Eli Bendersky64d11e62012-06-15 07:42:50 +03001392static PyObject *
1393create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1394
1395
Serhiy Storchakacb985562015-05-04 15:32:48 +03001396/*[clinic input]
1397_elementtree.Element.iter
1398
1399 tag: object = None
1400
1401[clinic start generated code]*/
1402
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001404_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1405/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001406{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001407 if (PyUnicode_Check(tag)) {
1408 if (PyUnicode_READY(tag) < 0)
1409 return NULL;
1410 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1411 tag = Py_None;
1412 }
1413 else if (PyBytes_Check(tag)) {
1414 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1415 tag = Py_None;
1416 }
1417
Eli Bendersky64d11e62012-06-15 07:42:50 +03001418 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001419}
1420
1421
Serhiy Storchakacb985562015-05-04 15:32:48 +03001422/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001423_elementtree.Element.getiterator
1424
1425 tag: object = None
1426
1427[clinic start generated code]*/
1428
1429static PyObject *
1430_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1431/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1432{
1433 /* Change for a DeprecationWarning in 1.4 */
1434 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1435 "This method will be removed in future versions. "
1436 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1437 1) < 0) {
1438 return NULL;
1439 }
1440 return _elementtree_Element_iter_impl(self, tag);
1441}
1442
1443
1444/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001445_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001446
Serhiy Storchakacb985562015-05-04 15:32:48 +03001447[clinic start generated code]*/
1448
1449static PyObject *
1450_elementtree_Element_itertext_impl(ElementObject *self)
1451/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1452{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001453 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001454}
1455
Eli Bendersky64d11e62012-06-15 07:42:50 +03001456
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001458element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001460 ElementObject* self = (ElementObject*) self_;
1461
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462 if (!self->extra || index < 0 || index >= self->extra->length) {
1463 PyErr_SetString(
1464 PyExc_IndexError,
1465 "child index out of range"
1466 );
1467 return NULL;
1468 }
1469
1470 Py_INCREF(self->extra->children[index]);
1471 return self->extra->children[index];
1472}
1473
Serhiy Storchakacb985562015-05-04 15:32:48 +03001474/*[clinic input]
1475_elementtree.Element.insert
1476
1477 index: Py_ssize_t
1478 subelement: object(subclass_of='&Element_Type')
1479 /
1480
1481[clinic start generated code]*/
1482
1483static PyObject *
1484_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1485 PyObject *subelement)
1486/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001487{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001488 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489
Victor Stinner5f0af232013-07-11 23:01:36 +02001490 if (!self->extra) {
1491 if (create_extra(self, NULL) < 0)
1492 return NULL;
1493 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001495 if (index < 0) {
1496 index += self->extra->length;
1497 if (index < 0)
1498 index = 0;
1499 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500 if (index > self->extra->length)
1501 index = self->extra->length;
1502
1503 if (element_resize(self, 1) < 0)
1504 return NULL;
1505
1506 for (i = self->extra->length; i > index; i--)
1507 self->extra->children[i] = self->extra->children[i-1];
1508
Serhiy Storchakacb985562015-05-04 15:32:48 +03001509 Py_INCREF(subelement);
1510 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511
1512 self->extra->length++;
1513
1514 Py_RETURN_NONE;
1515}
1516
Serhiy Storchakacb985562015-05-04 15:32:48 +03001517/*[clinic input]
1518_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519
Serhiy Storchakacb985562015-05-04 15:32:48 +03001520[clinic start generated code]*/
1521
1522static PyObject *
1523_elementtree_Element_items_impl(ElementObject *self)
1524/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1525{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526 if (!self->extra || self->extra->attrib == Py_None)
1527 return PyList_New(0);
1528
1529 return PyDict_Items(self->extra->attrib);
1530}
1531
Serhiy Storchakacb985562015-05-04 15:32:48 +03001532/*[clinic input]
1533_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534
Serhiy Storchakacb985562015-05-04 15:32:48 +03001535[clinic start generated code]*/
1536
1537static PyObject *
1538_elementtree_Element_keys_impl(ElementObject *self)
1539/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1540{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541 if (!self->extra || self->extra->attrib == Py_None)
1542 return PyList_New(0);
1543
1544 return PyDict_Keys(self->extra->attrib);
1545}
1546
Martin v. Löwis18e16552006-02-15 17:27:45 +00001547static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548element_length(ElementObject* self)
1549{
1550 if (!self->extra)
1551 return 0;
1552
1553 return self->extra->length;
1554}
1555
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556/*[clinic input]
1557_elementtree.Element.makeelement
1558
1559 tag: object
1560 attrib: object
1561 /
1562
1563[clinic start generated code]*/
1564
1565static PyObject *
1566_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1567 PyObject *attrib)
1568/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569{
1570 PyObject* elem;
1571
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 attrib = PyDict_Copy(attrib);
1573 if (!attrib)
1574 return NULL;
1575
Eli Bendersky092af1f2012-03-04 07:14:03 +02001576 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577
1578 Py_DECREF(attrib);
1579
1580 return elem;
1581}
1582
Serhiy Storchakacb985562015-05-04 15:32:48 +03001583/*[clinic input]
1584_elementtree.Element.remove
1585
1586 subelement: object(subclass_of='&Element_Type')
1587 /
1588
1589[clinic start generated code]*/
1590
1591static PyObject *
1592_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1593/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001595 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001596 int rc;
1597 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599 if (!self->extra) {
1600 /* element has no children, so raise exception */
1601 PyErr_SetString(
1602 PyExc_ValueError,
1603 "list.remove(x): x not in list"
1604 );
1605 return NULL;
1606 }
1607
1608 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001609 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001610 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001611 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001612 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001614 if (rc < 0)
1615 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 }
1617
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001618 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001619 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001620 PyErr_SetString(
1621 PyExc_ValueError,
1622 "list.remove(x): x not in list"
1623 );
1624 return NULL;
1625 }
1626
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001627 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628
1629 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630 for (; i < self->extra->length; i++)
1631 self->extra->children[i] = self->extra->children[i+1];
1632
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001633 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634 Py_RETURN_NONE;
1635}
1636
1637static PyObject*
1638element_repr(ElementObject* self)
1639{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001640 int status;
1641
1642 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001643 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001644
1645 status = Py_ReprEnter((PyObject *)self);
1646 if (status == 0) {
1647 PyObject *res;
1648 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1649 Py_ReprLeave((PyObject *)self);
1650 return res;
1651 }
1652 if (status > 0)
1653 PyErr_Format(PyExc_RuntimeError,
1654 "reentrant call inside %s.__repr__",
1655 Py_TYPE(self)->tp_name);
1656 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657}
1658
Serhiy Storchakacb985562015-05-04 15:32:48 +03001659/*[clinic input]
1660_elementtree.Element.set
1661
1662 key: object
1663 value: object
1664 /
1665
1666[clinic start generated code]*/
1667
1668static PyObject *
1669_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1670 PyObject *value)
1671/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001672{
1673 PyObject* attrib;
1674
Victor Stinner5f0af232013-07-11 23:01:36 +02001675 if (!self->extra) {
1676 if (create_extra(self, NULL) < 0)
1677 return NULL;
1678 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001679
1680 attrib = element_get_attrib(self);
1681 if (!attrib)
1682 return NULL;
1683
1684 if (PyDict_SetItem(attrib, key, value) < 0)
1685 return NULL;
1686
1687 Py_RETURN_NONE;
1688}
1689
1690static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001691element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001692{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001693 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695 PyObject* old;
1696
1697 if (!self->extra || index < 0 || index >= self->extra->length) {
1698 PyErr_SetString(
1699 PyExc_IndexError,
1700 "child assignment index out of range");
1701 return -1;
1702 }
1703
1704 old = self->extra->children[index];
1705
1706 if (item) {
1707 Py_INCREF(item);
1708 self->extra->children[index] = item;
1709 } else {
1710 self->extra->length--;
1711 for (i = index; i < self->extra->length; i++)
1712 self->extra->children[i] = self->extra->children[i+1];
1713 }
1714
1715 Py_DECREF(old);
1716
1717 return 0;
1718}
1719
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001720static PyObject*
1721element_subscr(PyObject* self_, PyObject* item)
1722{
1723 ElementObject* self = (ElementObject*) self_;
1724
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001725 if (PyIndex_Check(item)) {
1726 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001727
1728 if (i == -1 && PyErr_Occurred()) {
1729 return NULL;
1730 }
1731 if (i < 0 && self->extra)
1732 i += self->extra->length;
1733 return element_getitem(self_, i);
1734 }
1735 else if (PySlice_Check(item)) {
1736 Py_ssize_t start, stop, step, slicelen, cur, i;
1737 PyObject* list;
1738
1739 if (!self->extra)
1740 return PyList_New(0);
1741
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001742 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 return NULL;
1744 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001745 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1746 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747
1748 if (slicelen <= 0)
1749 return PyList_New(0);
1750 else {
1751 list = PyList_New(slicelen);
1752 if (!list)
1753 return NULL;
1754
1755 for (cur = start, i = 0; i < slicelen;
1756 cur += step, i++) {
1757 PyObject* item = self->extra->children[cur];
1758 Py_INCREF(item);
1759 PyList_SET_ITEM(list, i, item);
1760 }
1761
1762 return list;
1763 }
1764 }
1765 else {
1766 PyErr_SetString(PyExc_TypeError,
1767 "element indices must be integers");
1768 return NULL;
1769 }
1770}
1771
1772static int
1773element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1774{
1775 ElementObject* self = (ElementObject*) self_;
1776
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001777 if (PyIndex_Check(item)) {
1778 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779
1780 if (i == -1 && PyErr_Occurred()) {
1781 return -1;
1782 }
1783 if (i < 0 && self->extra)
1784 i += self->extra->length;
1785 return element_setitem(self_, i, value);
1786 }
1787 else if (PySlice_Check(item)) {
1788 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1789
1790 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001791 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001792
Victor Stinner5f0af232013-07-11 23:01:36 +02001793 if (!self->extra) {
1794 if (create_extra(self, NULL) < 0)
1795 return -1;
1796 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001797
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001798 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799 return -1;
1800 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001801 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1802 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
Eli Bendersky865756a2012-03-09 13:38:15 +02001804 if (value == NULL) {
1805 /* Delete slice */
1806 size_t cur;
1807 Py_ssize_t i;
1808
1809 if (slicelen <= 0)
1810 return 0;
1811
1812 /* Since we're deleting, the direction of the range doesn't matter,
1813 * so for simplicity make it always ascending.
1814 */
1815 if (step < 0) {
1816 stop = start + 1;
1817 start = stop + step * (slicelen - 1) - 1;
1818 step = -step;
1819 }
1820
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001821 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001822
1823 /* recycle is a list that will contain all the children
1824 * scheduled for removal.
1825 */
1826 if (!(recycle = PyList_New(slicelen))) {
1827 PyErr_NoMemory();
1828 return -1;
1829 }
1830
1831 /* This loop walks over all the children that have to be deleted,
1832 * with cur pointing at them. num_moved is the amount of children
1833 * until the next deleted child that have to be "shifted down" to
1834 * occupy the deleted's places.
1835 * Note that in the ith iteration, shifting is done i+i places down
1836 * because i children were already removed.
1837 */
1838 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1839 /* Compute how many children have to be moved, clipping at the
1840 * list end.
1841 */
1842 Py_ssize_t num_moved = step - 1;
1843 if (cur + step >= (size_t)self->extra->length) {
1844 num_moved = self->extra->length - cur - 1;
1845 }
1846
1847 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1848
1849 memmove(
1850 self->extra->children + cur - i,
1851 self->extra->children + cur + 1,
1852 num_moved * sizeof(PyObject *));
1853 }
1854
1855 /* Leftover "tail" after the last removed child */
1856 cur = start + (size_t)slicelen * step;
1857 if (cur < (size_t)self->extra->length) {
1858 memmove(
1859 self->extra->children + cur - slicelen,
1860 self->extra->children + cur,
1861 (self->extra->length - cur) * sizeof(PyObject *));
1862 }
1863
1864 self->extra->length -= slicelen;
1865
1866 /* Discard the recycle list with all the deleted sub-elements */
1867 Py_XDECREF(recycle);
1868 return 0;
1869 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001870
1871 /* A new slice is actually being assigned */
1872 seq = PySequence_Fast(value, "");
1873 if (!seq) {
1874 PyErr_Format(
1875 PyExc_TypeError,
1876 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1877 );
1878 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001879 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001880 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881
1882 if (step != 1 && newlen != slicelen)
1883 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001884 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001885 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001886 "attempt to assign sequence of size %zd "
1887 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001888 newlen, slicelen
1889 );
1890 return -1;
1891 }
1892
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001893 /* Resize before creating the recycle bin, to prevent refleaks. */
1894 if (newlen > slicelen) {
1895 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001896 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001897 return -1;
1898 }
1899 }
1900
1901 if (slicelen > 0) {
1902 /* to avoid recursive calls to this method (via decref), move
1903 old items to the recycle bin here, and get rid of them when
1904 we're done modifying the element */
1905 recycle = PyList_New(slicelen);
1906 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001907 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001908 return -1;
1909 }
1910 for (cur = start, i = 0; i < slicelen;
1911 cur += step, i++)
1912 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1913 }
1914
1915 if (newlen < slicelen) {
1916 /* delete slice */
1917 for (i = stop; i < self->extra->length; i++)
1918 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1919 } else if (newlen > slicelen) {
1920 /* insert slice */
1921 for (i = self->extra->length-1; i >= stop; i--)
1922 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1923 }
1924
1925 /* replace the slice */
1926 for (cur = start, i = 0; i < newlen;
1927 cur += step, i++) {
1928 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1929 Py_INCREF(element);
1930 self->extra->children[cur] = element;
1931 }
1932
1933 self->extra->length += newlen - slicelen;
1934
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001935 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001936
1937 /* discard the recycle bin, and everything in it */
1938 Py_XDECREF(recycle);
1939
1940 return 0;
1941 }
1942 else {
1943 PyErr_SetString(PyExc_TypeError,
1944 "element indices must be integers");
1945 return -1;
1946 }
1947}
1948
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001949static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001950element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001951{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001952 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001953 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001954 return res;
1955}
1956
Serhiy Storchakadde08152015-11-25 15:28:13 +02001957static PyObject*
1958element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001960 PyObject *res = element_get_text(self);
1961 Py_XINCREF(res);
1962 return res;
1963}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001964
Serhiy Storchakadde08152015-11-25 15:28:13 +02001965static PyObject*
1966element_tail_getter(ElementObject *self, void *closure)
1967{
1968 PyObject *res = element_get_tail(self);
1969 Py_XINCREF(res);
1970 return res;
1971}
1972
1973static PyObject*
1974element_attrib_getter(ElementObject *self, void *closure)
1975{
1976 PyObject *res;
1977 if (!self->extra) {
1978 if (create_extra(self, NULL) < 0)
1979 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001980 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001981 res = element_get_attrib(self);
1982 Py_XINCREF(res);
1983 return res;
1984}
Victor Stinner4d463432013-07-11 23:05:03 +02001985
Serhiy Storchakadde08152015-11-25 15:28:13 +02001986/* macro for setter validation */
1987#define _VALIDATE_ATTR_VALUE(V) \
1988 if ((V) == NULL) { \
1989 PyErr_SetString( \
1990 PyExc_AttributeError, \
1991 "can't delete element attribute"); \
1992 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001993 }
1994
Serhiy Storchakadde08152015-11-25 15:28:13 +02001995static int
1996element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1997{
1998 _VALIDATE_ATTR_VALUE(value);
1999 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002000 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002001 return 0;
2002}
2003
2004static int
2005element_text_setter(ElementObject *self, PyObject *value, void *closure)
2006{
2007 _VALIDATE_ATTR_VALUE(value);
2008 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002009 _set_joined_ptr(&self->text, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002010 return 0;
2011}
2012
2013static int
2014element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2015{
2016 _VALIDATE_ATTR_VALUE(value);
2017 Py_INCREF(value);
Oren Milman39ecb9c2017-10-10 23:26:24 +03002018 _set_joined_ptr(&self->tail, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002019 return 0;
2020}
2021
2022static int
2023element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2024{
2025 _VALIDATE_ATTR_VALUE(value);
2026 if (!self->extra) {
2027 if (create_extra(self, NULL) < 0)
2028 return -1;
2029 }
2030 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002031 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002032 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002033}
2034
2035static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002036 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037 0, /* sq_concat */
2038 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002039 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002040 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002041 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002042 0,
2043};
2044
Eli Bendersky64d11e62012-06-15 07:42:50 +03002045/******************************* Element iterator ****************************/
2046
2047/* ElementIterObject represents the iteration state over an XML element in
2048 * pre-order traversal. To keep track of which sub-element should be returned
2049 * next, a stack of parents is maintained. This is a standard stack-based
2050 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002051 * The stack is managed using a continuous array.
2052 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002053 * the current one is exhausted, and the next child to examine in that parent.
2054 */
2055typedef struct ParentLocator_t {
2056 ElementObject *parent;
2057 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002058} ParentLocator;
2059
2060typedef struct {
2061 PyObject_HEAD
2062 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002063 Py_ssize_t parent_stack_used;
2064 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065 ElementObject *root_element;
2066 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067 int gettext;
2068} ElementIterObject;
2069
2070
2071static void
2072elementiter_dealloc(ElementIterObject *it)
2073{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002074 Py_ssize_t i = it->parent_stack_used;
2075 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002076 /* bpo-31095: UnTrack is needed before calling any callbacks */
2077 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002078 while (i--)
2079 Py_XDECREF(it->parent_stack[i].parent);
2080 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002081
2082 Py_XDECREF(it->sought_tag);
2083 Py_XDECREF(it->root_element);
2084
Eli Bendersky64d11e62012-06-15 07:42:50 +03002085 PyObject_GC_Del(it);
2086}
2087
2088static int
2089elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2090{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002091 Py_ssize_t i = it->parent_stack_used;
2092 while (i--)
2093 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002094
2095 Py_VISIT(it->root_element);
2096 Py_VISIT(it->sought_tag);
2097 return 0;
2098}
2099
2100/* Helper function for elementiter_next. Add a new parent to the parent stack.
2101 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002102static int
2103parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002104{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002105 ParentLocator *item;
2106
2107 if (it->parent_stack_used >= it->parent_stack_size) {
2108 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2109 ParentLocator *parent_stack = it->parent_stack;
2110 PyMem_Resize(parent_stack, ParentLocator, new_size);
2111 if (parent_stack == NULL)
2112 return -1;
2113 it->parent_stack = parent_stack;
2114 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002116 item = it->parent_stack + it->parent_stack_used++;
2117 Py_INCREF(parent);
2118 item->parent = parent;
2119 item->child_index = 0;
2120 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121}
2122
2123static PyObject *
2124elementiter_next(ElementIterObject *it)
2125{
2126 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002127 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002128 * A short note on gettext: this function serves both the iter() and
2129 * itertext() methods to avoid code duplication. However, there are a few
2130 * small differences in the way these iterations work. Namely:
2131 * - itertext() only yields text from nodes that have it, and continues
2132 * iterating when a node doesn't have text (so it doesn't return any
2133 * node like iter())
2134 * - itertext() also has to handle tail, after finishing with all the
2135 * children of a node.
2136 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002137 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002138 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002139 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002140
2141 while (1) {
2142 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002143 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144 * iterator is exhausted.
2145 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002146 if (!it->parent_stack_used) {
2147 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 PyErr_SetNone(PyExc_StopIteration);
2149 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002150 }
2151
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002152 elem = it->root_element; /* steals a reference */
2153 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154 }
2155 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002156 /* See if there are children left to traverse in the current parent. If
2157 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002159 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2160 Py_ssize_t child_index = item->child_index;
2161 ElementObjectExtra *extra;
2162 elem = item->parent;
2163 extra = elem->extra;
2164 if (!extra || child_index >= extra->length) {
2165 it->parent_stack_used--;
2166 /* Note that extra condition on it->parent_stack_used here;
2167 * this is because itertext() is supposed to only return *inner*
2168 * text, not text following the element it began iteration with.
2169 */
2170 if (it->gettext && it->parent_stack_used) {
2171 text = element_get_tail(elem);
2172 goto gettext;
2173 }
2174 Py_DECREF(elem);
2175 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002176 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002177
Miss Islington (bot)b1c80032018-10-14 00:55:49 -07002178 if (!Element_Check(extra->children[child_index])) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002179 PyErr_Format(PyExc_AttributeError,
2180 "'%.100s' object has no attribute 'iter'",
2181 Py_TYPE(extra->children[child_index])->tp_name);
2182 return NULL;
2183 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002184 elem = (ElementObject *)extra->children[child_index];
2185 item->child_index++;
2186 Py_INCREF(elem);
2187 }
2188
2189 if (parent_stack_push_new(it, elem) < 0) {
2190 Py_DECREF(elem);
2191 PyErr_NoMemory();
2192 return NULL;
2193 }
2194 if (it->gettext) {
2195 text = element_get_text(elem);
2196 goto gettext;
2197 }
2198
2199 if (it->sought_tag == Py_None)
2200 return (PyObject *)elem;
2201
2202 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2203 if (rc > 0)
2204 return (PyObject *)elem;
2205
2206 Py_DECREF(elem);
2207 if (rc < 0)
2208 return NULL;
2209 continue;
2210
2211gettext:
2212 if (!text) {
2213 Py_DECREF(elem);
2214 return NULL;
2215 }
2216 if (text == Py_None) {
2217 Py_DECREF(elem);
2218 }
2219 else {
2220 Py_INCREF(text);
2221 Py_DECREF(elem);
2222 rc = PyObject_IsTrue(text);
2223 if (rc > 0)
2224 return text;
2225 Py_DECREF(text);
2226 if (rc < 0)
2227 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002228 }
2229 }
2230
2231 return NULL;
2232}
2233
2234
2235static PyTypeObject ElementIter_Type = {
2236 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002237 /* Using the module's name since the pure-Python implementation does not
2238 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002239 "_elementtree._element_iterator", /* tp_name */
2240 sizeof(ElementIterObject), /* tp_basicsize */
2241 0, /* tp_itemsize */
2242 /* methods */
2243 (destructor)elementiter_dealloc, /* tp_dealloc */
2244 0, /* tp_print */
2245 0, /* tp_getattr */
2246 0, /* tp_setattr */
2247 0, /* tp_reserved */
2248 0, /* tp_repr */
2249 0, /* tp_as_number */
2250 0, /* tp_as_sequence */
2251 0, /* tp_as_mapping */
2252 0, /* tp_hash */
2253 0, /* tp_call */
2254 0, /* tp_str */
2255 0, /* tp_getattro */
2256 0, /* tp_setattro */
2257 0, /* tp_as_buffer */
2258 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2259 0, /* tp_doc */
2260 (traverseproc)elementiter_traverse, /* tp_traverse */
2261 0, /* tp_clear */
2262 0, /* tp_richcompare */
2263 0, /* tp_weaklistoffset */
2264 PyObject_SelfIter, /* tp_iter */
2265 (iternextfunc)elementiter_next, /* tp_iternext */
2266 0, /* tp_methods */
2267 0, /* tp_members */
2268 0, /* tp_getset */
2269 0, /* tp_base */
2270 0, /* tp_dict */
2271 0, /* tp_descr_get */
2272 0, /* tp_descr_set */
2273 0, /* tp_dictoffset */
2274 0, /* tp_init */
2275 0, /* tp_alloc */
2276 0, /* tp_new */
2277};
2278
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002279#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002280
2281static PyObject *
2282create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2283{
2284 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002285
2286 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2287 if (!it)
2288 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002289
Victor Stinner4d463432013-07-11 23:05:03 +02002290 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002291 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002292 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002293 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 it->root_element = self;
2295
Eli Bendersky64d11e62012-06-15 07:42:50 +03002296 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002297
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002298 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002299 if (it->parent_stack == NULL) {
2300 Py_DECREF(it);
2301 PyErr_NoMemory();
2302 return NULL;
2303 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002304 it->parent_stack_used = 0;
2305 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002306
Eli Bendersky64d11e62012-06-15 07:42:50 +03002307 return (PyObject *)it;
2308}
2309
2310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311/* ==================================================================== */
2312/* the tree builder type */
2313
2314typedef struct {
2315 PyObject_HEAD
2316
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318
Antoine Pitrouee329312012-10-04 19:53:29 +02002319 PyObject *this; /* current node */
2320 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323
Eli Bendersky58d548d2012-05-29 15:45:16 +03002324 PyObject *stack; /* element stack */
2325 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326
Eli Bendersky48d358b2012-05-30 17:57:50 +03002327 PyObject *element_factory;
2328
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002329 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002330 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002331 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2332 PyObject *end_event_obj;
2333 PyObject *start_ns_event_obj;
2334 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335} TreeBuilderObject;
2336
Christian Heimes90aa7642007-12-19 02:45:37 +00002337#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338
2339/* -------------------------------------------------------------------- */
2340/* constructor and destructor */
2341
Eli Bendersky58d548d2012-05-29 15:45:16 +03002342static PyObject *
2343treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002344{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002345 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2346 if (t != NULL) {
2347 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002348
Eli Bendersky58d548d2012-05-29 15:45:16 +03002349 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002350 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002351 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002352 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002355 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 t->stack = PyList_New(20);
2357 if (!t->stack) {
2358 Py_DECREF(t->this);
2359 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002360 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002361 return NULL;
2362 }
2363 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002364
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002365 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002366 t->start_event_obj = t->end_event_obj = NULL;
2367 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2368 }
2369 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370}
2371
Serhiy Storchakacb985562015-05-04 15:32:48 +03002372/*[clinic input]
2373_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002374
Serhiy Storchakacb985562015-05-04 15:32:48 +03002375 element_factory: object = NULL
2376
2377[clinic start generated code]*/
2378
2379static int
2380_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2381 PyObject *element_factory)
2382/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2383{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384 if (element_factory) {
2385 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002386 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387 }
2388
Eli Bendersky58d548d2012-05-29 15:45:16 +03002389 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002390}
2391
Eli Bendersky48d358b2012-05-30 17:57:50 +03002392static int
2393treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2394{
2395 Py_VISIT(self->root);
2396 Py_VISIT(self->this);
2397 Py_VISIT(self->last);
2398 Py_VISIT(self->data);
2399 Py_VISIT(self->stack);
2400 Py_VISIT(self->element_factory);
2401 return 0;
2402}
2403
2404static int
2405treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002407 Py_CLEAR(self->end_ns_event_obj);
2408 Py_CLEAR(self->start_ns_event_obj);
2409 Py_CLEAR(self->end_event_obj);
2410 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002411 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002412 Py_CLEAR(self->stack);
2413 Py_CLEAR(self->data);
2414 Py_CLEAR(self->last);
2415 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002416 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002417 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002418 return 0;
2419}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002420
Eli Bendersky48d358b2012-05-30 17:57:50 +03002421static void
2422treebuilder_dealloc(TreeBuilderObject *self)
2423{
2424 PyObject_GC_UnTrack(self);
2425 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002426 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002427}
2428
2429/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002430/* helpers for handling of arbitrary element-like objects */
2431
2432static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002433treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002434 PyObject **dest, _Py_Identifier *name)
2435{
2436 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002437 PyObject *tmp = JOIN_OBJ(*dest);
2438 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2439 *data = NULL;
2440 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002441 return 0;
2442 }
2443 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002444 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002445 int r;
2446 if (joined == NULL)
2447 return -1;
2448 r = _PyObject_SetAttrId(element, name, joined);
2449 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002450 if (r < 0)
2451 return -1;
2452 Py_CLEAR(*data);
2453 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002454 }
2455}
2456
Serhiy Storchaka576def02017-03-30 09:47:31 +03002457LOCAL(int)
2458treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002459{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002460 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002461
Serhiy Storchaka576def02017-03-30 09:47:31 +03002462 if (!self->data) {
2463 return 0;
2464 }
2465
2466 if (self->this == element) {
2467 _Py_IDENTIFIER(text);
2468 return treebuilder_set_element_text_or_tail(
2469 element, &self->data,
2470 &((ElementObject *) element)->text, &PyId_text);
2471 }
2472 else {
2473 _Py_IDENTIFIER(tail);
2474 return treebuilder_set_element_text_or_tail(
2475 element, &self->data,
2476 &((ElementObject *) element)->tail, &PyId_tail);
2477 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002478}
2479
2480static int
2481treebuilder_add_subelement(PyObject *element, PyObject *child)
2482{
2483 _Py_IDENTIFIER(append);
2484 if (Element_CheckExact(element)) {
2485 ElementObject *elem = (ElementObject *) element;
2486 return element_add_subelement(elem, child);
2487 }
2488 else {
2489 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002490 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002491 if (res == NULL)
2492 return -1;
2493 Py_DECREF(res);
2494 return 0;
2495 }
2496}
2497
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002498LOCAL(int)
2499treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2500 PyObject *node)
2501{
2502 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002503 PyObject *res;
2504 PyObject *event = PyTuple_Pack(2, action, node);
2505 if (event == NULL)
2506 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002507 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002508 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002509 if (res == NULL)
2510 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002511 Py_DECREF(res);
2512 }
2513 return 0;
2514}
2515
Antoine Pitrouee329312012-10-04 19:53:29 +02002516/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517/* handlers */
2518
2519LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2521 PyObject* attrib)
2522{
2523 PyObject* node;
2524 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002525 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526
Serhiy Storchaka576def02017-03-30 09:47:31 +03002527 if (treebuilder_flush_data(self) < 0) {
2528 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 }
2530
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002531 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002532 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002533 } else if (attrib == Py_None) {
2534 attrib = PyDict_New();
2535 if (!attrib)
2536 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002537 node = PyObject_CallFunctionObjArgs(self->element_factory,
2538 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002539 Py_DECREF(attrib);
2540 }
2541 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002542 node = PyObject_CallFunctionObjArgs(self->element_factory,
2543 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002544 }
2545 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002547 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548
Antoine Pitrouee329312012-10-04 19:53:29 +02002549 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550
2551 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002552 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002553 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 } else {
2555 if (self->root) {
2556 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002557 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 "multiple elements on top level"
2559 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002560 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 }
2562 Py_INCREF(node);
2563 self->root = node;
2564 }
2565
2566 if (self->index < PyList_GET_SIZE(self->stack)) {
2567 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002568 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569 Py_INCREF(this);
2570 } else {
2571 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002572 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 }
2574 self->index++;
2575
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002577 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002579 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002581 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2582 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583
2584 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002585
2586 error:
2587 Py_DECREF(node);
2588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589}
2590
2591LOCAL(PyObject*)
2592treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2593{
2594 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002595 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002596 /* ignore calls to data before the first call to start */
2597 Py_RETURN_NONE;
2598 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 /* store the first item as is */
2600 Py_INCREF(data); self->data = data;
2601 } else {
2602 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002603 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2604 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002605 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002606 /* expat often generates single character data sections; handle
2607 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002608 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2609 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002611 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 } else if (PyList_CheckExact(self->data)) {
2613 if (PyList_Append(self->data, data) < 0)
2614 return NULL;
2615 } else {
2616 PyObject* list = PyList_New(2);
2617 if (!list)
2618 return NULL;
2619 PyList_SET_ITEM(list, 0, self->data);
2620 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2621 self->data = list;
2622 }
2623 }
2624
2625 Py_RETURN_NONE;
2626}
2627
2628LOCAL(PyObject*)
2629treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2630{
2631 PyObject* item;
2632
Serhiy Storchaka576def02017-03-30 09:47:31 +03002633 if (treebuilder_flush_data(self) < 0) {
2634 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635 }
2636
2637 if (self->index == 0) {
2638 PyErr_SetString(
2639 PyExc_IndexError,
2640 "pop from empty stack"
2641 );
2642 return NULL;
2643 }
2644
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002645 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002646 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002647 self->index--;
2648 self->this = PyList_GET_ITEM(self->stack, self->index);
2649 Py_INCREF(self->this);
2650 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002652 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2653 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
2655 Py_INCREF(self->last);
2656 return (PyObject*) self->last;
2657}
2658
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659/* -------------------------------------------------------------------- */
2660/* methods (in alphabetical order) */
2661
Serhiy Storchakacb985562015-05-04 15:32:48 +03002662/*[clinic input]
2663_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664
Serhiy Storchakacb985562015-05-04 15:32:48 +03002665 data: object
2666 /
2667
2668[clinic start generated code]*/
2669
2670static PyObject *
2671_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2672/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2673{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002674 return treebuilder_handle_data(self, data);
2675}
2676
Serhiy Storchakacb985562015-05-04 15:32:48 +03002677/*[clinic input]
2678_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679
Serhiy Storchakacb985562015-05-04 15:32:48 +03002680 tag: object
2681 /
2682
2683[clinic start generated code]*/
2684
2685static PyObject *
2686_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2687/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2688{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002689 return treebuilder_handle_end(self, tag);
2690}
2691
2692LOCAL(PyObject*)
2693treebuilder_done(TreeBuilderObject* self)
2694{
2695 PyObject* res;
2696
2697 /* FIXME: check stack size? */
2698
2699 if (self->root)
2700 res = self->root;
2701 else
2702 res = Py_None;
2703
2704 Py_INCREF(res);
2705 return res;
2706}
2707
Serhiy Storchakacb985562015-05-04 15:32:48 +03002708/*[clinic input]
2709_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710
Serhiy Storchakacb985562015-05-04 15:32:48 +03002711[clinic start generated code]*/
2712
2713static PyObject *
2714_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2715/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2716{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717 return treebuilder_done(self);
2718}
2719
Serhiy Storchakacb985562015-05-04 15:32:48 +03002720/*[clinic input]
2721_elementtree.TreeBuilder.start
2722
2723 tag: object
2724 attrs: object = None
2725 /
2726
2727[clinic start generated code]*/
2728
2729static PyObject *
2730_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2731 PyObject *attrs)
2732/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002734 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735}
2736
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737/* ==================================================================== */
2738/* the expat interface */
2739
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002742
2743/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2744 * cached globally without being in per-module state.
2745 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002746static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748
Eli Bendersky52467b12012-06-01 07:13:08 +03002749static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2750 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752typedef struct {
2753 PyObject_HEAD
2754
2755 XML_Parser parser;
2756
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002757 PyObject *target;
2758 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002760 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002762 PyObject *handle_start;
2763 PyObject *handle_data;
2764 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002766 PyObject *handle_comment;
2767 PyObject *handle_pi;
2768 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002770 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002771
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772} XMLParserObject;
2773
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002774static PyObject*
Serhiy Storchakaa5552f02017-12-15 13:11:11 +02002775_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *const *args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002776static PyObject *
2777_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2778 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002779
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780/* helpers */
2781
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782LOCAL(PyObject*)
2783makeuniversal(XMLParserObject* self, const char* string)
2784{
2785 /* convert a UTF-8 tag/attribute name from the expat parser
2786 to a universal name string */
2787
Antoine Pitrouc1948842012-10-01 23:40:37 +02002788 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 PyObject* key;
2790 PyObject* value;
2791
2792 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002793 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 if (!key)
2795 return NULL;
2796
2797 value = PyDict_GetItem(self->names, key);
2798
2799 if (value) {
2800 Py_INCREF(value);
2801 } else {
2802 /* new name. convert to universal name, and decode as
2803 necessary */
2804
2805 PyObject* tag;
2806 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002807 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808
2809 /* look for namespace separator */
2810 for (i = 0; i < size; i++)
2811 if (string[i] == '}')
2812 break;
2813 if (i != size) {
2814 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002815 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002816 if (tag == NULL) {
2817 Py_DECREF(key);
2818 return NULL;
2819 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002820 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821 p[0] = '{';
2822 memcpy(p+1, string, size);
2823 size++;
2824 } else {
2825 /* plain name; use key as tag */
2826 Py_INCREF(key);
2827 tag = key;
2828 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002829
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002830 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002831 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002832 value = PyUnicode_DecodeUTF8(p, size, "strict");
2833 Py_DECREF(tag);
2834 if (!value) {
2835 Py_DECREF(key);
2836 return NULL;
2837 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002838
2839 /* add to names dictionary */
2840 if (PyDict_SetItem(self->names, key, value) < 0) {
2841 Py_DECREF(key);
2842 Py_DECREF(value);
2843 return NULL;
2844 }
2845 }
2846
2847 Py_DECREF(key);
2848 return value;
2849}
2850
Eli Bendersky5b77d812012-03-16 08:20:05 +02002851/* Set the ParseError exception with the given parameters.
2852 * If message is not NULL, it's used as the error string. Otherwise, the
2853 * message string is the default for the given error_code.
2854*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002855static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002856expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2857 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002858{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002859 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002860 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002861
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002862 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002863 message ? message : EXPAT(ErrorString)(error_code),
2864 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002865 if (errmsg == NULL)
2866 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002867
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002868 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002869 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002870 if (!error)
2871 return;
2872
Eli Bendersky5b77d812012-03-16 08:20:05 +02002873 /* Add code and position attributes */
2874 code = PyLong_FromLong((long)error_code);
2875 if (!code) {
2876 Py_DECREF(error);
2877 return;
2878 }
2879 if (PyObject_SetAttrString(error, "code", code) == -1) {
2880 Py_DECREF(error);
2881 Py_DECREF(code);
2882 return;
2883 }
2884 Py_DECREF(code);
2885
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002886 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002887 if (!position) {
2888 Py_DECREF(error);
2889 return;
2890 }
2891 if (PyObject_SetAttrString(error, "position", position) == -1) {
2892 Py_DECREF(error);
2893 Py_DECREF(position);
2894 return;
2895 }
2896 Py_DECREF(position);
2897
Eli Bendersky532d03e2013-08-10 08:00:39 -07002898 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002899 Py_DECREF(error);
2900}
2901
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902/* -------------------------------------------------------------------- */
2903/* handlers */
2904
2905static void
2906expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2907 int data_len)
2908{
2909 PyObject* key;
2910 PyObject* value;
2911 PyObject* res;
2912
2913 if (data_len < 2 || data_in[0] != '&')
2914 return;
2915
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002916 if (PyErr_Occurred())
2917 return;
2918
Neal Norwitz0269b912007-08-08 06:56:02 +00002919 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002920 if (!key)
2921 return;
2922
2923 value = PyDict_GetItem(self->entity, key);
2924
2925 if (value) {
2926 if (TreeBuilder_CheckExact(self->target))
2927 res = treebuilder_handle_data(
2928 (TreeBuilderObject*) self->target, value
2929 );
2930 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002931 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 else
2933 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002935 } else if (!PyErr_Occurred()) {
2936 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002937 char message[128] = "undefined entity ";
2938 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002939 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002940 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002942 EXPAT(GetErrorColumnNumber)(self->parser),
2943 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 );
2945 }
2946
2947 Py_DECREF(key);
2948}
2949
2950static void
2951expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2952 const XML_Char **attrib_in)
2953{
2954 PyObject* res;
2955 PyObject* tag;
2956 PyObject* attrib;
2957 int ok;
2958
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002959 if (PyErr_Occurred())
2960 return;
2961
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 /* tag name */
2963 tag = makeuniversal(self, tag_in);
2964 if (!tag)
2965 return; /* parser will look for errors */
2966
2967 /* attributes */
2968 if (attrib_in[0]) {
2969 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002970 if (!attrib) {
2971 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002973 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 while (attrib_in[0] && attrib_in[1]) {
2975 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002976 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 if (!key || !value) {
2978 Py_XDECREF(value);
2979 Py_XDECREF(key);
2980 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002981 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982 return;
2983 }
2984 ok = PyDict_SetItem(attrib, key, value);
2985 Py_DECREF(value);
2986 Py_DECREF(key);
2987 if (ok < 0) {
2988 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002989 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002990 return;
2991 }
2992 attrib_in += 2;
2993 }
2994 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002995 Py_INCREF(Py_None);
2996 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002997 }
2998
2999 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003000 /* shortcut */
3001 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3002 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003003 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003004 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003005 if (attrib == Py_None) {
3006 Py_DECREF(attrib);
3007 attrib = PyDict_New();
3008 if (!attrib) {
3009 Py_DECREF(tag);
3010 return;
3011 }
3012 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003013 res = PyObject_CallFunctionObjArgs(self->handle_start,
3014 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003015 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 res = NULL;
3017
3018 Py_DECREF(tag);
3019 Py_DECREF(attrib);
3020
3021 Py_XDECREF(res);
3022}
3023
3024static void
3025expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3026 int data_len)
3027{
3028 PyObject* data;
3029 PyObject* res;
3030
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003031 if (PyErr_Occurred())
3032 return;
3033
Neal Norwitz0269b912007-08-08 06:56:02 +00003034 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003035 if (!data)
3036 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037
3038 if (TreeBuilder_CheckExact(self->target))
3039 /* shortcut */
3040 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3041 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003042 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003043 else
3044 res = NULL;
3045
3046 Py_DECREF(data);
3047
3048 Py_XDECREF(res);
3049}
3050
3051static void
3052expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3053{
3054 PyObject* tag;
3055 PyObject* res = NULL;
3056
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003057 if (PyErr_Occurred())
3058 return;
3059
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 if (TreeBuilder_CheckExact(self->target))
3061 /* shortcut */
3062 /* the standard tree builder doesn't look at the end tag */
3063 res = treebuilder_handle_end(
3064 (TreeBuilderObject*) self->target, Py_None
3065 );
3066 else if (self->handle_end) {
3067 tag = makeuniversal(self, tag_in);
3068 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003069 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070 Py_DECREF(tag);
3071 }
3072 }
3073
3074 Py_XDECREF(res);
3075}
3076
3077static void
3078expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3079 const XML_Char *uri)
3080{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003081 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3082 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003083
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003084 if (PyErr_Occurred())
3085 return;
3086
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003087 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003088 return;
3089
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003090 if (!uri)
3091 uri = "";
3092 if (!prefix)
3093 prefix = "";
3094
3095 parcel = Py_BuildValue("ss", prefix, uri);
3096 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003097 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003098 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3099 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100}
3101
3102static void
3103expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3104{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003105 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3106
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003107 if (PyErr_Occurred())
3108 return;
3109
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003110 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003111 return;
3112
3113 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114}
3115
3116static void
3117expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3118{
3119 PyObject* comment;
3120 PyObject* res;
3121
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003122 if (PyErr_Occurred())
3123 return;
3124
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003125 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003126 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003128 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3129 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003130 Py_XDECREF(res);
3131 Py_DECREF(comment);
3132 }
3133 }
3134}
3135
Eli Bendersky45839902013-01-13 05:14:47 -08003136static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003137expat_start_doctype_handler(XMLParserObject *self,
3138 const XML_Char *doctype_name,
3139 const XML_Char *sysid,
3140 const XML_Char *pubid,
3141 int has_internal_subset)
3142{
3143 PyObject *self_pyobj = (PyObject *)self;
3144 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3145 PyObject *parser_doctype = NULL;
3146 PyObject *res = NULL;
3147
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003148 if (PyErr_Occurred())
3149 return;
3150
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003151 doctype_name_obj = makeuniversal(self, doctype_name);
3152 if (!doctype_name_obj)
3153 return;
3154
3155 if (sysid) {
3156 sysid_obj = makeuniversal(self, sysid);
3157 if (!sysid_obj) {
3158 Py_DECREF(doctype_name_obj);
3159 return;
3160 }
3161 } else {
3162 Py_INCREF(Py_None);
3163 sysid_obj = Py_None;
3164 }
3165
3166 if (pubid) {
3167 pubid_obj = makeuniversal(self, pubid);
3168 if (!pubid_obj) {
3169 Py_DECREF(doctype_name_obj);
3170 Py_DECREF(sysid_obj);
3171 return;
3172 }
3173 } else {
3174 Py_INCREF(Py_None);
3175 pubid_obj = Py_None;
3176 }
3177
3178 /* If the target has a handler for doctype, call it. */
3179 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003180 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3181 doctype_name_obj, pubid_obj,
3182 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003183 Py_CLEAR(res);
3184 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003185 else {
3186 /* Now see if the parser itself has a doctype method. If yes and it's
3187 * a custom method, call it but warn about deprecation. If it's only
3188 * the vanilla XMLParser method, do nothing.
3189 */
3190 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3191 if (parser_doctype &&
3192 !(PyCFunction_Check(parser_doctype) &&
3193 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3194 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003195 (PyCFunction) _elementtree_XMLParser_doctype)) {
3196 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3197 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003198 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003199 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003200 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003201 res = PyObject_CallFunctionObjArgs(parser_doctype,
3202 doctype_name_obj, pubid_obj,
3203 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003204 Py_CLEAR(res);
3205 }
3206 }
3207
3208clear:
3209 Py_XDECREF(parser_doctype);
3210 Py_DECREF(doctype_name_obj);
3211 Py_DECREF(pubid_obj);
3212 Py_DECREF(sysid_obj);
3213}
3214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215static void
3216expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3217 const XML_Char* data_in)
3218{
3219 PyObject* target;
3220 PyObject* data;
3221 PyObject* res;
3222
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003223 if (PyErr_Occurred())
3224 return;
3225
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003227 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3228 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003230 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3231 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 Py_XDECREF(res);
3233 Py_DECREF(data);
3234 Py_DECREF(target);
3235 } else {
3236 Py_XDECREF(data);
3237 Py_XDECREF(target);
3238 }
3239 }
3240}
3241
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243
Eli Bendersky52467b12012-06-01 07:13:08 +03003244static PyObject *
3245xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246{
Eli Bendersky52467b12012-06-01 07:13:08 +03003247 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3248 if (self) {
3249 self->parser = NULL;
3250 self->target = self->entity = self->names = NULL;
3251 self->handle_start = self->handle_data = self->handle_end = NULL;
3252 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003253 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 return (PyObject *)self;
3256}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257
scoderc8d8e152017-09-14 22:00:03 +02003258static int
3259ignore_attribute_error(PyObject *value)
3260{
3261 if (value == NULL) {
3262 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3263 return -1;
3264 }
3265 PyErr_Clear();
3266 }
3267 return 0;
3268}
3269
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270/*[clinic input]
3271_elementtree.XMLParser.__init__
3272
3273 html: object = NULL
3274 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003275 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003276
3277[clinic start generated code]*/
3278
Eli Bendersky52467b12012-06-01 07:13:08 +03003279static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003280_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3281 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003282/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003283{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003284 if (html != NULL) {
3285 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3286 "The html argument of XMLParser() is deprecated",
3287 1) < 0) {
3288 return -1;
3289 }
3290 }
3291
Serhiy Storchakacb985562015-05-04 15:32:48 +03003292 self->entity = PyDict_New();
3293 if (!self->entity)
3294 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->names = PyDict_New();
3297 if (!self->names) {
3298 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003299 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003301
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3303 if (!self->parser) {
3304 Py_CLEAR(self->entity);
3305 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003307 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 }
Miss Islington (bot)470a4352018-09-18 06:11:09 -07003309 /* expat < 2.1.0 has no XML_SetHashSalt() */
3310 if (EXPAT(SetHashSalt) != NULL) {
3311 EXPAT(SetHashSalt)(self->parser,
3312 (unsigned long)_Py_HashSecret.expat.hashsalt);
3313 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
Eli Bendersky52467b12012-06-01 07:13:08 +03003315 if (target) {
3316 Py_INCREF(target);
3317 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003318 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 Py_CLEAR(self->entity);
3321 Py_CLEAR(self->names);
3322 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003323 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003325 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327
Serhiy Storchakacb985562015-05-04 15:32:48 +03003328 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003329 if (ignore_attribute_error(self->handle_start)) {
3330 return -1;
3331 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003332 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003333 if (ignore_attribute_error(self->handle_data)) {
3334 return -1;
3335 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003337 if (ignore_attribute_error(self->handle_end)) {
3338 return -1;
3339 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003341 if (ignore_attribute_error(self->handle_comment)) {
3342 return -1;
3343 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003344 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003345 if (ignore_attribute_error(self->handle_pi)) {
3346 return -1;
3347 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003348 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003349 if (ignore_attribute_error(self->handle_close)) {
3350 return -1;
3351 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003352 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003353 if (ignore_attribute_error(self->handle_doctype)) {
3354 return -1;
3355 }
Eli Bendersky45839902013-01-13 05:14:47 -08003356
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003358 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003360 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361 (XML_StartElementHandler) expat_start_handler,
3362 (XML_EndElementHandler) expat_end_handler
3363 );
3364 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366 (XML_DefaultHandler) expat_default_handler
3367 );
3368 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 (XML_CharacterDataHandler) expat_data_handler
3371 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003372 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003374 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375 (XML_CommentHandler) expat_comment_handler
3376 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 (XML_ProcessingInstructionHandler) expat_pi_handler
3381 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003382 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003383 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003384 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3385 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003388 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390
Eli Bendersky52467b12012-06-01 07:13:08 +03003391 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392}
3393
Eli Bendersky52467b12012-06-01 07:13:08 +03003394static int
3395xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3396{
3397 Py_VISIT(self->handle_close);
3398 Py_VISIT(self->handle_pi);
3399 Py_VISIT(self->handle_comment);
3400 Py_VISIT(self->handle_end);
3401 Py_VISIT(self->handle_data);
3402 Py_VISIT(self->handle_start);
3403
3404 Py_VISIT(self->target);
3405 Py_VISIT(self->entity);
3406 Py_VISIT(self->names);
3407
3408 return 0;
3409}
3410
3411static int
3412xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413{
Victor Stinnere727d412017-09-18 05:29:37 -07003414 if (self->parser != NULL) {
3415 XML_Parser parser = self->parser;
3416 self->parser = NULL;
3417 EXPAT(ParserFree)(parser);
3418 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419
Antoine Pitrouc1948842012-10-01 23:40:37 +02003420 Py_CLEAR(self->handle_close);
3421 Py_CLEAR(self->handle_pi);
3422 Py_CLEAR(self->handle_comment);
3423 Py_CLEAR(self->handle_end);
3424 Py_CLEAR(self->handle_data);
3425 Py_CLEAR(self->handle_start);
3426 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427
Antoine Pitrouc1948842012-10-01 23:40:37 +02003428 Py_CLEAR(self->target);
3429 Py_CLEAR(self->entity);
3430 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431
Eli Bendersky52467b12012-06-01 07:13:08 +03003432 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433}
3434
Eli Bendersky52467b12012-06-01 07:13:08 +03003435static void
3436xmlparser_dealloc(XMLParserObject* self)
3437{
3438 PyObject_GC_UnTrack(self);
3439 xmlparser_gc_clear(self);
3440 Py_TYPE(self)->tp_free((PyObject *)self);
3441}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442
3443LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003444expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003445{
3446 int ok;
3447
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003448 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3450
3451 if (PyErr_Occurred())
3452 return NULL;
3453
3454 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003455 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003456 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003457 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003458 EXPAT(GetErrorColumnNumber)(self->parser),
3459 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460 );
3461 return NULL;
3462 }
3463
3464 Py_RETURN_NONE;
3465}
3466
Serhiy Storchakacb985562015-05-04 15:32:48 +03003467/*[clinic input]
3468_elementtree.XMLParser.close
3469
3470[clinic start generated code]*/
3471
3472static PyObject *
3473_elementtree_XMLParser_close_impl(XMLParserObject *self)
3474/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475{
3476 /* end feeding data to parser */
3477
3478 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003480 if (!res)
3481 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003483 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003484 Py_DECREF(res);
3485 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003486 }
3487 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003488 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003489 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003490 }
3491 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003492 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003493 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494}
3495
Serhiy Storchakacb985562015-05-04 15:32:48 +03003496/*[clinic input]
3497_elementtree.XMLParser.feed
3498
3499 data: object
3500 /
3501
3502[clinic start generated code]*/
3503
3504static PyObject *
3505_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3506/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507{
3508 /* feed data to parser */
3509
Serhiy Storchakacb985562015-05-04 15:32:48 +03003510 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003511 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003512 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3513 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003514 return NULL;
3515 if (data_len > INT_MAX) {
3516 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3517 return NULL;
3518 }
3519 /* Explicitly set UTF-8 encoding. Return code ignored. */
3520 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003521 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003522 }
3523 else {
3524 Py_buffer view;
3525 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003526 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003527 return NULL;
3528 if (view.len > INT_MAX) {
3529 PyBuffer_Release(&view);
3530 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3531 return NULL;
3532 }
3533 res = expat_parse(self, view.buf, (int)view.len, 0);
3534 PyBuffer_Release(&view);
3535 return res;
3536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003537}
3538
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539/*[clinic input]
3540_elementtree.XMLParser._parse_whole
3541
3542 file: object
3543 /
3544
3545[clinic start generated code]*/
3546
3547static PyObject *
3548_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3549/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550{
Eli Benderskya3699232013-05-19 18:47:23 -07003551 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 PyObject* reader;
3553 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003554 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 PyObject* res;
3556
Serhiy Storchakacb985562015-05-04 15:32:48 +03003557 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558 if (!reader)
3559 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003560
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003561 /* read from open file object */
3562 for (;;) {
3563
3564 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3565
3566 if (!buffer) {
3567 /* read failed (e.g. due to KeyboardInterrupt) */
3568 Py_DECREF(reader);
3569 return NULL;
3570 }
3571
Eli Benderskyf996e772012-03-16 05:53:30 +02003572 if (PyUnicode_CheckExact(buffer)) {
3573 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003574 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003575 Py_DECREF(buffer);
3576 break;
3577 }
3578 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003579 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003580 if (!temp) {
3581 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003582 Py_DECREF(reader);
3583 return NULL;
3584 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003585 buffer = temp;
3586 }
3587 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 Py_DECREF(buffer);
3589 break;
3590 }
3591
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003592 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3593 Py_DECREF(buffer);
3594 Py_DECREF(reader);
3595 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3596 return NULL;
3597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003599 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600 );
3601
3602 Py_DECREF(buffer);
3603
3604 if (!res) {
3605 Py_DECREF(reader);
3606 return NULL;
3607 }
3608 Py_DECREF(res);
3609
3610 }
3611
3612 Py_DECREF(reader);
3613
3614 res = expat_parse(self, "", 0, 1);
3615
3616 if (res && TreeBuilder_CheckExact(self->target)) {
3617 Py_DECREF(res);
3618 return treebuilder_done((TreeBuilderObject*) self->target);
3619 }
3620
3621 return res;
3622}
3623
Serhiy Storchakacb985562015-05-04 15:32:48 +03003624/*[clinic input]
3625_elementtree.XMLParser.doctype
3626
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003627 name: object
3628 pubid: object
3629 system: object
3630 /
3631
Serhiy Storchakacb985562015-05-04 15:32:48 +03003632[clinic start generated code]*/
3633
3634static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003635_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3636 PyObject *pubid, PyObject *system)
3637/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003638{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003639 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3640 "This method of XMLParser is deprecated. Define"
3641 " doctype() method on the TreeBuilder target.",
3642 1) < 0) {
3643 return NULL;
3644 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003645 Py_RETURN_NONE;
3646}
3647
Serhiy Storchakacb985562015-05-04 15:32:48 +03003648/*[clinic input]
3649_elementtree.XMLParser._setevents
3650
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003651 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003652 events_to_report: object = None
3653 /
3654
3655[clinic start generated code]*/
3656
3657static PyObject *
3658_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3659 PyObject *events_queue,
3660 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003661/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662{
3663 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003664 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003665 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003666 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667
3668 if (!TreeBuilder_CheckExact(self->target)) {
3669 PyErr_SetString(
3670 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003671 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672 "targets"
3673 );
3674 return NULL;
3675 }
3676
3677 target = (TreeBuilderObject*) self->target;
3678
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003679 events_append = PyObject_GetAttrString(events_queue, "append");
3680 if (events_append == NULL)
3681 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003682 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683
3684 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003685 Py_CLEAR(target->start_event_obj);
3686 Py_CLEAR(target->end_event_obj);
3687 Py_CLEAR(target->start_ns_event_obj);
3688 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003690 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003691 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003692 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693 Py_RETURN_NONE;
3694 }
3695
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003696 if (!(events_seq = PySequence_Fast(events_to_report,
3697 "events must be a sequence"))) {
3698 return NULL;
3699 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003701 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003702 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003703 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003704 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003705 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003706 } else if (PyBytes_Check(event_name_obj)) {
3707 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003708 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003709 if (event_name == NULL) {
3710 Py_DECREF(events_seq);
3711 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3712 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003713 }
3714
3715 Py_INCREF(event_name_obj);
3716 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003717 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003718 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003719 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003720 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003721 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722 EXPAT(SetNamespaceDeclHandler)(
3723 self->parser,
3724 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3725 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3726 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003727 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003728 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003729 EXPAT(SetNamespaceDeclHandler)(
3730 self->parser,
3731 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3732 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3733 );
3734 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003735 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003736 Py_DECREF(events_seq);
3737 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 return NULL;
3739 }
3740 }
3741
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003742 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003743 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003744}
3745
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003746static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003747xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003748{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003749 if (PyUnicode_Check(nameobj)) {
3750 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003751 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003752 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003753 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003754 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003755 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003756 return PyUnicode_FromFormat(
3757 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003758 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003759 }
3760 else
3761 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762
Alexander Belopolskye239d232010-12-08 23:31:48 +00003763 Py_INCREF(res);
3764 return res;
3765 }
3766 generic:
3767 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003768}
3769
Serhiy Storchakacb985562015-05-04 15:32:48 +03003770#include "clinic/_elementtree.c.h"
3771
3772static PyMethodDef element_methods[] = {
3773
3774 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3775
3776 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3777 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3778
3779 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3780 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3781 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3782
3783 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3784 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3785 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3786 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3787
3788 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3789 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3790 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3791
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003792 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003793 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3794
3795 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3796 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3797
3798 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3799
3800 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3801 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3802 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3803 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3804 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3805
3806 {NULL, NULL}
3807};
3808
3809static PyMappingMethods element_as_mapping = {
3810 (lenfunc) element_length,
3811 (binaryfunc) element_subscr,
3812 (objobjargproc) element_ass_subscr,
3813};
3814
Serhiy Storchakadde08152015-11-25 15:28:13 +02003815static PyGetSetDef element_getsetlist[] = {
3816 {"tag",
3817 (getter)element_tag_getter,
3818 (setter)element_tag_setter,
3819 "A string identifying what kind of data this element represents"},
3820 {"text",
3821 (getter)element_text_getter,
3822 (setter)element_text_setter,
3823 "A string of text directly after the start tag, or None"},
3824 {"tail",
3825 (getter)element_tail_getter,
3826 (setter)element_tail_setter,
3827 "A string of text directly after the end tag, or None"},
3828 {"attrib",
3829 (getter)element_attrib_getter,
3830 (setter)element_attrib_setter,
3831 "A dictionary containing the element's attributes"},
3832 {NULL},
3833};
3834
Serhiy Storchakacb985562015-05-04 15:32:48 +03003835static PyTypeObject Element_Type = {
3836 PyVarObject_HEAD_INIT(NULL, 0)
3837 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3838 /* methods */
3839 (destructor)element_dealloc, /* tp_dealloc */
3840 0, /* tp_print */
3841 0, /* tp_getattr */
3842 0, /* tp_setattr */
3843 0, /* tp_reserved */
3844 (reprfunc)element_repr, /* tp_repr */
3845 0, /* tp_as_number */
3846 &element_as_sequence, /* tp_as_sequence */
3847 &element_as_mapping, /* tp_as_mapping */
3848 0, /* tp_hash */
3849 0, /* tp_call */
3850 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003851 PyObject_GenericGetAttr, /* tp_getattro */
3852 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003853 0, /* tp_as_buffer */
3854 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3855 /* tp_flags */
3856 0, /* tp_doc */
3857 (traverseproc)element_gc_traverse, /* tp_traverse */
3858 (inquiry)element_gc_clear, /* tp_clear */
3859 0, /* tp_richcompare */
3860 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3861 0, /* tp_iter */
3862 0, /* tp_iternext */
3863 element_methods, /* tp_methods */
3864 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003865 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003866 0, /* tp_base */
3867 0, /* tp_dict */
3868 0, /* tp_descr_get */
3869 0, /* tp_descr_set */
3870 0, /* tp_dictoffset */
3871 (initproc)element_init, /* tp_init */
3872 PyType_GenericAlloc, /* tp_alloc */
3873 element_new, /* tp_new */
3874 0, /* tp_free */
3875};
3876
3877static PyMethodDef treebuilder_methods[] = {
3878 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3879 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3880 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3881 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3882 {NULL, NULL}
3883};
3884
3885static PyTypeObject TreeBuilder_Type = {
3886 PyVarObject_HEAD_INIT(NULL, 0)
3887 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3888 /* methods */
3889 (destructor)treebuilder_dealloc, /* tp_dealloc */
3890 0, /* tp_print */
3891 0, /* tp_getattr */
3892 0, /* tp_setattr */
3893 0, /* tp_reserved */
3894 0, /* tp_repr */
3895 0, /* tp_as_number */
3896 0, /* tp_as_sequence */
3897 0, /* tp_as_mapping */
3898 0, /* tp_hash */
3899 0, /* tp_call */
3900 0, /* tp_str */
3901 0, /* tp_getattro */
3902 0, /* tp_setattro */
3903 0, /* tp_as_buffer */
3904 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3905 /* tp_flags */
3906 0, /* tp_doc */
3907 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3908 (inquiry)treebuilder_gc_clear, /* tp_clear */
3909 0, /* tp_richcompare */
3910 0, /* tp_weaklistoffset */
3911 0, /* tp_iter */
3912 0, /* tp_iternext */
3913 treebuilder_methods, /* tp_methods */
3914 0, /* tp_members */
3915 0, /* tp_getset */
3916 0, /* tp_base */
3917 0, /* tp_dict */
3918 0, /* tp_descr_get */
3919 0, /* tp_descr_set */
3920 0, /* tp_dictoffset */
3921 _elementtree_TreeBuilder___init__, /* tp_init */
3922 PyType_GenericAlloc, /* tp_alloc */
3923 treebuilder_new, /* tp_new */
3924 0, /* tp_free */
3925};
3926
3927static PyMethodDef xmlparser_methods[] = {
3928 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3929 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3930 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3931 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3932 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3933 {NULL, NULL}
3934};
3935
Neal Norwitz227b5332006-03-22 09:28:35 +00003936static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003937 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003938 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003939 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003940 (destructor)xmlparser_dealloc, /* tp_dealloc */
3941 0, /* tp_print */
3942 0, /* tp_getattr */
3943 0, /* tp_setattr */
3944 0, /* tp_reserved */
3945 0, /* tp_repr */
3946 0, /* tp_as_number */
3947 0, /* tp_as_sequence */
3948 0, /* tp_as_mapping */
3949 0, /* tp_hash */
3950 0, /* tp_call */
3951 0, /* tp_str */
3952 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3953 0, /* tp_setattro */
3954 0, /* tp_as_buffer */
3955 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3956 /* tp_flags */
3957 0, /* tp_doc */
3958 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3959 (inquiry)xmlparser_gc_clear, /* tp_clear */
3960 0, /* tp_richcompare */
3961 0, /* tp_weaklistoffset */
3962 0, /* tp_iter */
3963 0, /* tp_iternext */
3964 xmlparser_methods, /* tp_methods */
3965 0, /* tp_members */
3966 0, /* tp_getset */
3967 0, /* tp_base */
3968 0, /* tp_dict */
3969 0, /* tp_descr_get */
3970 0, /* tp_descr_set */
3971 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003972 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 PyType_GenericAlloc, /* tp_alloc */
3974 xmlparser_new, /* tp_new */
3975 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003976};
3977
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003978/* ==================================================================== */
3979/* python module interface */
3980
3981static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003982 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003983 {NULL, NULL}
3984};
3985
Martin v. Löwis1a214512008-06-11 05:26:20 +00003986
Eli Bendersky532d03e2013-08-10 08:00:39 -07003987static struct PyModuleDef elementtreemodule = {
3988 PyModuleDef_HEAD_INIT,
3989 "_elementtree",
3990 NULL,
3991 sizeof(elementtreestate),
3992 _functions,
3993 NULL,
3994 elementtree_traverse,
3995 elementtree_clear,
3996 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003997};
3998
Neal Norwitzf6657e62006-12-28 04:47:50 +00003999PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004000PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004001{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004002 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004003 elementtreestate *st;
4004
4005 m = PyState_FindModule(&elementtreemodule);
4006 if (m) {
4007 Py_INCREF(m);
4008 return m;
4009 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004010
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004011 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004012 if (PyType_Ready(&ElementIter_Type) < 0)
4013 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004014 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004015 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004016 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004017 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004018 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004019 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004020
Eli Bendersky532d03e2013-08-10 08:00:39 -07004021 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004022 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004023 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004024 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004025
Eli Bendersky828efde2012-04-05 05:40:58 +03004026 if (!(temp = PyImport_ImportModule("copy")))
4027 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004028 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004029 Py_XDECREF(temp);
4030
Victor Stinnerb136f112017-07-10 22:28:02 +02004031 if (st->deepcopy_obj == NULL) {
4032 return NULL;
4033 }
4034
4035 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004036 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004037 return NULL;
4038
Eli Bendersky20d41742012-06-01 09:48:37 +03004039 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004040 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4041 if (expat_capi) {
4042 /* check that it's usable */
4043 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004044 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004045 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4046 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004047 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004048 PyErr_SetString(PyExc_ImportError,
4049 "pyexpat version is incompatible");
4050 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004051 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004052 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004053 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004054 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004055
Eli Bendersky532d03e2013-08-10 08:00:39 -07004056 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004057 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004058 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004059 Py_INCREF(st->parseerror_obj);
4060 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004061
Eli Bendersky092af1f2012-03-04 07:14:03 +02004062 Py_INCREF((PyObject *)&Element_Type);
4063 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4064
Eli Bendersky58d548d2012-05-29 15:45:16 +03004065 Py_INCREF((PyObject *)&TreeBuilder_Type);
4066 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4067
Eli Bendersky52467b12012-06-01 07:13:08 +03004068 Py_INCREF((PyObject *)&XMLParser_Type);
4069 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004070
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004071 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004072}