blob: bddac851d9c708e1b0a5b2dec3237c88009b99d9 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300134 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 return result;
144}
145
Eli Bendersky48d358b2012-05-30 17:57:50 +0300146/* Is the given object an empty dictionary?
147*/
148static int
149is_empty_dict(PyObject *obj)
150{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200151 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300152}
153
154
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200156/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157
158typedef struct {
159
160 /* attributes (a dictionary object), or None if no attributes */
161 PyObject* attrib;
162
163 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200164 Py_ssize_t length; /* actual number of items */
165 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166
167 /* this either points to _children or to a malloced buffer */
168 PyObject* *children;
169
170 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000172} ElementObjectExtra;
173
174typedef struct {
175 PyObject_HEAD
176
177 /* element tag (a string). */
178 PyObject* tag;
179
180 /* text before first child. note that this is a tagged pointer;
181 use JOIN_OBJ to get the object pointer. the join flag is used
182 to distinguish lists created by the tree builder from lists
183 assigned to the attribute by application code; the former
184 should be joined before being returned to the user, the latter
185 should be left intact. */
186 PyObject* text;
187
188 /* text after this element, in parent. note that this is a tagged
189 pointer; use JOIN_OBJ to get the object pointer. */
190 PyObject* tail;
191
192 ElementObjectExtra* extra;
193
Eli Benderskyebf37a22012-04-03 22:02:37 +0300194 PyObject *weakreflist; /* For tp_weaklistoffset */
195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObject;
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198
Christian Heimes90aa7642007-12-19 02:45:37 +0000199#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200202/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203
204LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200205create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206{
207 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200208 if (!self->extra) {
209 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200211 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213 if (!attrib)
214 attrib = Py_None;
215
216 Py_INCREF(attrib);
217 self->extra->attrib = attrib;
218
219 self->extra->length = 0;
220 self->extra->allocated = STATIC_CHILDREN;
221 self->extra->children = self->extra->_children;
222
223 return 0;
224}
225
226LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200227dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000228{
Eli Bendersky08b85292012-04-04 15:55:07 +0300229 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200230 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300231
Eli Benderskyebf37a22012-04-03 22:02:37 +0300232 if (!self->extra)
233 return;
234
235 /* Avoid DECREFs calling into this code again (cycles, etc.)
236 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300237 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300238 self->extra = NULL;
239
240 Py_DECREF(myextra->attrib);
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 for (i = 0; i < myextra->length; i++)
243 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 if (myextra->children != myextra->_children)
246 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
Eli Benderskyebf37a22012-04-03 22:02:37 +0300248 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249}
250
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251/* Convenience internal function to create new Element objects with the given
252 * tag and attributes.
253*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
257 ElementObject* self;
258
Eli Bendersky0192ba32012-03-30 16:38:33 +0300259 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260 if (self == NULL)
261 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 self->extra = NULL;
263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 Py_INCREF(tag);
265 self->tag = tag;
266
267 Py_INCREF(Py_None);
268 self->text = Py_None;
269
270 Py_INCREF(Py_None);
271 self->tail = Py_None;
272
Eli Benderskyebf37a22012-04-03 22:02:37 +0300273 self->weakreflist = NULL;
274
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200275 ALLOC(sizeof(ElementObject), "create element");
276 PyObject_GC_Track(self);
277
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200278 if (attrib != Py_None && !is_empty_dict(attrib)) {
279 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200280 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200281 return NULL;
282 }
283 }
284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 return (PyObject*) self;
286}
287
Eli Bendersky092af1f2012-03-04 07:14:03 +0200288static PyObject *
289element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
290{
291 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
292 if (e != NULL) {
293 Py_INCREF(Py_None);
294 e->tag = Py_None;
295
296 Py_INCREF(Py_None);
297 e->text = Py_None;
298
299 Py_INCREF(Py_None);
300 e->tail = Py_None;
301
302 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300303 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200304 }
305 return (PyObject *)e;
306}
307
Eli Bendersky737b1732012-05-29 06:02:56 +0300308/* Helper function for extracting the attrib dictionary from a keywords dict.
309 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800310 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300311 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700312 *
313 * Return a dictionary with the content of kwds merged into the content of
314 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300315 */
316static PyObject*
317get_attrib_from_keywords(PyObject *kwds)
318{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700319 PyObject *attrib_str = PyUnicode_FromString("attrib");
320 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300321
322 if (attrib) {
323 /* If attrib was found in kwds, copy its value and remove it from
324 * kwds
325 */
326 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700327 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
329 Py_TYPE(attrib)->tp_name);
330 return NULL;
331 }
332 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700333 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 } else {
335 attrib = PyDict_New();
336 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700337
338 Py_DECREF(attrib_str);
339
340 /* attrib can be NULL if PyDict_New failed */
341 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200342 if (PyDict_Update(attrib, kwds) < 0)
343 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300344 return attrib;
345}
346
Serhiy Storchakacb985562015-05-04 15:32:48 +0300347/*[clinic input]
348module _elementtree
349class _elementtree.Element "ElementObject *" "&Element_Type"
350class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
351class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
352[clinic start generated code]*/
353/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
354
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355static int
356element_init(PyObject *self, PyObject *args, PyObject *kwds)
357{
358 PyObject *tag;
359 PyObject *tmp;
360 PyObject *attrib = NULL;
361 ElementObject *self_elem;
362
363 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
364 return -1;
365
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 if (attrib) {
367 /* attrib passed as positional arg */
368 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200369 if (!attrib)
370 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300371 if (kwds) {
372 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200373 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 return -1;
375 }
376 }
377 } else if (kwds) {
378 /* have keywords args */
379 attrib = get_attrib_from_keywords(kwds);
380 if (!attrib)
381 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 }
383
384 self_elem = (ElementObject *)self;
385
Antoine Pitrouc1948842012-10-01 23:40:37 +0200386 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 return -1;
390 }
391 }
392
Eli Bendersky48d358b2012-05-30 17:57:50 +0300393 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395
396 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300398 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399
400 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_DECREF(JOIN_OBJ(tmp));
404
405 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_DECREF(JOIN_OBJ(tmp));
409
410 return 0;
411}
412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000413LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200414element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417 PyObject* *children;
418
419 /* make sure self->children can hold the given number of extra
420 elements. set an exception and return -1 if allocation failed */
421
Victor Stinner5f0af232013-07-11 23:01:36 +0200422 if (!self->extra) {
423 if (create_extra(self, NULL) < 0)
424 return -1;
425 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000426
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200427 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
429 if (size > self->extra->allocated) {
430 /* use Python 2.4's list growth strategy */
431 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * which needs at least 4 bytes.
434 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * be safe.
436 */
437 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200438 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
439 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100442 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 * false alarm always assume at least one child to be safe.
444 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 children = PyObject_Realloc(self->extra->children,
446 size * sizeof(PyObject*));
447 if (!children)
448 goto nomemory;
449 } else {
450 children = PyObject_Malloc(size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 /* copy existing children from static area to malloc buffer */
454 memcpy(children, self->extra->children,
455 self->extra->length * sizeof(PyObject*));
456 }
457 self->extra->children = children;
458 self->extra->allocated = size;
459 }
460
461 return 0;
462
463 nomemory:
464 PyErr_NoMemory();
465 return -1;
466}
467
468LOCAL(int)
469element_add_subelement(ElementObject* self, PyObject* element)
470{
471 /* add a child element to a parent */
472
473 if (element_resize(self, 1) < 0)
474 return -1;
475
476 Py_INCREF(element);
477 self->extra->children[self->extra->length] = element;
478
479 self->extra->length++;
480
481 return 0;
482}
483
484LOCAL(PyObject*)
485element_get_attrib(ElementObject* self)
486{
487 /* return borrowed reference to attrib dictionary */
488 /* note: this function assumes that the extra section exists */
489
490 PyObject* res = self->extra->attrib;
491
492 if (res == Py_None) {
493 /* create missing dictionary */
494 res = PyDict_New();
495 if (!res)
496 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200497 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000498 self->extra->attrib = res;
499 }
500
501 return res;
502}
503
504LOCAL(PyObject*)
505element_get_text(ElementObject* self)
506{
507 /* return borrowed reference to text attribute */
508
Serhiy Storchaka576def02017-03-30 09:47:31 +0300509 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000510
511 if (JOIN_GET(res)) {
512 res = JOIN_OBJ(res);
513 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300514 PyObject *tmp = list_join(res);
515 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000516 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300517 self->text = tmp;
518 Py_DECREF(res);
519 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
Serhiy Storchaka576def02017-03-30 09:47:31 +0300531 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300536 PyObject *tmp = list_join(res);
537 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300539 self->tail = tmp;
540 Py_DECREF(res);
541 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
INADA Naokia6296d32017-08-24 14:55:17 +0900630 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200632 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300633
634 if (self->weakreflist != NULL)
635 PyObject_ClearWeakRefs((PyObject *) self);
636
Eli Bendersky0192ba32012-03-30 16:38:33 +0300637 /* element_gc_clear clears all references and deallocates extra
638 */
639 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640
641 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200642 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200643 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644}
645
646/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648/*[clinic input]
649_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650
Serhiy Storchakacb985562015-05-04 15:32:48 +0300651 subelement: object(subclass_of='&Element_Type')
652 /
653
654[clinic start generated code]*/
655
656static PyObject *
657_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
658/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
659{
660 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000661 return NULL;
662
663 Py_RETURN_NONE;
664}
665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666/*[clinic input]
667_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669[clinic start generated code]*/
670
671static PyObject *
672_elementtree_Element_clear_impl(ElementObject *self)
673/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
674{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300675 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676
677 Py_INCREF(Py_None);
678 Py_DECREF(JOIN_OBJ(self->text));
679 self->text = Py_None;
680
681 Py_INCREF(Py_None);
682 Py_DECREF(JOIN_OBJ(self->tail));
683 self->tail = Py_None;
684
685 Py_RETURN_NONE;
686}
687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.__copy__
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element___copy___impl(ElementObject *self)
695/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200697 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698 ElementObject* element;
699
Eli Bendersky092af1f2012-03-04 07:14:03 +0200700 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800701 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 if (!element)
703 return NULL;
704
705 Py_DECREF(JOIN_OBJ(element->text));
706 element->text = self->text;
707 Py_INCREF(JOIN_OBJ(element->text));
708
709 Py_DECREF(JOIN_OBJ(element->tail));
710 element->tail = self->tail;
711 Py_INCREF(JOIN_OBJ(element->tail));
712
713 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 if (element_resize(element, self->extra->length) < 0) {
715 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718
719 for (i = 0; i < self->extra->length; i++) {
720 Py_INCREF(self->extra->children[i]);
721 element->extra->children[i] = self->extra->children[i];
722 }
723
724 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 }
726
727 return (PyObject*) element;
728}
729
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200730/* Helper for a deep copy. */
731LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
732
Serhiy Storchakacb985562015-05-04 15:32:48 +0300733/*[clinic input]
734_elementtree.Element.__deepcopy__
735
Oren Milmand0568182017-09-12 17:39:15 +0300736 memo: object(subclass_of="&PyDict_Type")
Serhiy Storchakacb985562015-05-04 15:32:48 +0300737 /
738
739[clinic start generated code]*/
740
741static PyObject *
Oren Milmand0568182017-09-12 17:39:15 +0300742_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
743/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200745 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 ElementObject* element;
747 PyObject* tag;
748 PyObject* attrib;
749 PyObject* text;
750 PyObject* tail;
751 PyObject* id;
752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 tag = deepcopy(self->tag, memo);
754 if (!tag)
755 return NULL;
756
757 if (self->extra) {
758 attrib = deepcopy(self->extra->attrib, memo);
759 if (!attrib) {
760 Py_DECREF(tag);
761 return NULL;
762 }
763 } else {
764 Py_INCREF(Py_None);
765 attrib = Py_None;
766 }
767
Eli Bendersky092af1f2012-03-04 07:14:03 +0200768 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769
770 Py_DECREF(tag);
771 Py_DECREF(attrib);
772
773 if (!element)
774 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776 text = deepcopy(JOIN_OBJ(self->text), memo);
777 if (!text)
778 goto error;
779 Py_DECREF(element->text);
780 element->text = JOIN_SET(text, JOIN_GET(self->text));
781
782 tail = deepcopy(JOIN_OBJ(self->tail), memo);
783 if (!tail)
784 goto error;
785 Py_DECREF(element->tail);
786 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
787
788 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789 if (element_resize(element, self->extra->length) < 0)
790 goto error;
791
792 for (i = 0; i < self->extra->length; i++) {
793 PyObject* child = deepcopy(self->extra->children[i], memo);
794 if (!child) {
795 element->extra->length = i;
796 goto error;
797 }
798 element->extra->children[i] = child;
799 }
800
801 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 }
803
804 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700805 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000806 if (!id)
807 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808
809 i = PyDict_SetItem(memo, id, (PyObject*) element);
810
811 Py_DECREF(id);
812
813 if (i < 0)
814 goto error;
815
816 return (PyObject*) element;
817
818 error:
819 Py_DECREF(element);
820 return NULL;
821}
822
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200823LOCAL(PyObject *)
824deepcopy(PyObject *object, PyObject *memo)
825{
826 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200827 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200828 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200829
830 /* Fast paths */
831 if (object == Py_None || PyUnicode_CheckExact(object)) {
832 Py_INCREF(object);
833 return object;
834 }
835
836 if (Py_REFCNT(object) == 1) {
837 if (PyDict_CheckExact(object)) {
838 PyObject *key, *value;
839 Py_ssize_t pos = 0;
840 int simple = 1;
841 while (PyDict_Next(object, &pos, &key, &value)) {
842 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
843 simple = 0;
844 break;
845 }
846 }
847 if (simple)
848 return PyDict_Copy(object);
849 /* Fall through to general case */
850 }
851 else if (Element_CheckExact(object)) {
Oren Milmand0568182017-09-12 17:39:15 +0300852 return _elementtree_Element___deepcopy___impl(
853 (ElementObject *)object, memo);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200854 }
855 }
856
857 /* General case */
858 st = ET_STATE_GLOBAL;
859 if (!st->deepcopy_obj) {
860 PyErr_SetString(PyExc_RuntimeError,
861 "deepcopy helper not found");
862 return NULL;
863 }
864
Victor Stinner7fbac452016-08-20 01:34:44 +0200865 stack[0] = object;
866 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200867 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200868}
869
870
Serhiy Storchakacb985562015-05-04 15:32:48 +0300871/*[clinic input]
872_elementtree.Element.__sizeof__ -> Py_ssize_t
873
874[clinic start generated code]*/
875
876static Py_ssize_t
877_elementtree_Element___sizeof___impl(ElementObject *self)
878/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200880 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881 if (self->extra) {
882 result += sizeof(ElementObjectExtra);
883 if (self->extra->children != self->extra->_children)
884 result += sizeof(PyObject*) * self->extra->allocated;
885 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300886 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200887}
888
Eli Bendersky698bdb22013-01-10 06:01:06 -0800889/* dict keys for getstate/setstate. */
890#define PICKLED_TAG "tag"
891#define PICKLED_CHILDREN "_children"
892#define PICKLED_ATTRIB "attrib"
893#define PICKLED_TAIL "tail"
894#define PICKLED_TEXT "text"
895
896/* __getstate__ returns a fabricated instance dict as in the pure-Python
897 * Element implementation, for interoperability/interchangeability. This
898 * makes the pure-Python implementation details an API, but (a) there aren't
899 * any unnecessary structures there; and (b) it buys compatibility with 3.2
900 * pickles. See issue #16076.
901 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902/*[clinic input]
903_elementtree.Element.__getstate__
904
905[clinic start generated code]*/
906
Eli Bendersky698bdb22013-01-10 06:01:06 -0800907static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300908_elementtree_Element___getstate___impl(ElementObject *self)
909/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200911 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 PyObject *instancedict = NULL, *children;
913
914 /* Build a list of children. */
915 children = PyList_New(self->extra ? self->extra->length : 0);
916 if (!children)
917 return NULL;
918 for (i = 0; i < PyList_GET_SIZE(children); i++) {
919 PyObject *child = self->extra->children[i];
920 Py_INCREF(child);
921 PyList_SET_ITEM(children, i, child);
922 }
923
924 /* Construct the state object. */
925 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
926 if (noattrib)
927 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
928 PICKLED_TAG, self->tag,
929 PICKLED_CHILDREN, children,
930 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700931 PICKLED_TEXT, JOIN_OBJ(self->text),
932 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800933 else
934 instancedict = Py_BuildValue("{sOsOsOsOsO}",
935 PICKLED_TAG, self->tag,
936 PICKLED_CHILDREN, children,
937 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700938 PICKLED_TEXT, JOIN_OBJ(self->text),
939 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800940 if (instancedict) {
941 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800943 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944 else {
945 for (i = 0; i < PyList_GET_SIZE(children); i++)
946 Py_DECREF(PyList_GET_ITEM(children, i));
947 Py_DECREF(children);
948
949 return NULL;
950 }
951}
952
953static PyObject *
954element_setstate_from_attributes(ElementObject *self,
955 PyObject *tag,
956 PyObject *attrib,
957 PyObject *text,
958 PyObject *tail,
959 PyObject *children)
960{
961 Py_ssize_t i, nchildren;
962
963 if (!tag) {
964 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
965 return NULL;
966 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200968 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300969 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Eli Benderskydd3661e2013-09-13 06:24:25 -0700971 _clear_joined_ptr(&self->text);
972 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
973 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
Eli Benderskydd3661e2013-09-13 06:24:25 -0700975 _clear_joined_ptr(&self->tail);
976 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
977 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978
979 /* Handle ATTRIB and CHILDREN. */
980 if (!children && !attrib)
981 Py_RETURN_NONE;
982
983 /* Compute 'nchildren'. */
984 if (children) {
985 if (!PyList_Check(children)) {
986 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
987 return NULL;
988 }
989 nchildren = PyList_Size(children);
990 }
991 else {
992 nchildren = 0;
993 }
994
995 /* Allocate 'extra'. */
996 if (element_resize(self, nchildren)) {
997 return NULL;
998 }
999 assert(self->extra && self->extra->allocated >= nchildren);
1000
1001 /* Copy children */
1002 for (i = 0; i < nchildren; i++) {
1003 self->extra->children[i] = PyList_GET_ITEM(children, i);
1004 Py_INCREF(self->extra->children[i]);
1005 }
1006
1007 self->extra->length = nchildren;
1008 self->extra->allocated = nchildren;
1009
1010 /* Stash attrib. */
1011 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001013 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014 }
1015
1016 Py_RETURN_NONE;
1017}
1018
1019/* __setstate__ for Element instance from the Python implementation.
1020 * 'state' should be the instance dict.
1021 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001022
Eli Bendersky698bdb22013-01-10 06:01:06 -08001023static PyObject *
1024element_setstate_from_Python(ElementObject *self, PyObject *state)
1025{
1026 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1027 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1028 PyObject *args;
1029 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031
Eli Bendersky698bdb22013-01-10 06:01:06 -08001032 tag = attrib = text = tail = children = NULL;
1033 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036
1037 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1038 &attrib, &text, &tail, &children))
1039 retval = element_setstate_from_attributes(self, tag, attrib, text,
1040 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001041 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001042 retval = NULL;
1043
1044 Py_DECREF(args);
1045 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001046}
1047
Serhiy Storchakacb985562015-05-04 15:32:48 +03001048/*[clinic input]
1049_elementtree.Element.__setstate__
1050
1051 state: object
1052 /
1053
1054[clinic start generated code]*/
1055
Eli Bendersky698bdb22013-01-10 06:01:06 -08001056static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001057_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1058/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001059{
1060 if (!PyDict_CheckExact(state)) {
1061 PyErr_Format(PyExc_TypeError,
1062 "Don't know how to unpickle \"%.200R\" as an Element",
1063 state);
1064 return NULL;
1065 }
1066 else
1067 return element_setstate_from_Python(self, state);
1068}
1069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001070LOCAL(int)
1071checkpath(PyObject* tag)
1072{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001073 Py_ssize_t i;
1074 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075
1076 /* check if a tag contains an xpath character */
1077
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001078#define PATHCHAR(ch) \
1079 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001080
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1083 void *data = PyUnicode_DATA(tag);
1084 unsigned int kind = PyUnicode_KIND(tag);
1085 for (i = 0; i < len; i++) {
1086 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1087 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001091 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 return 1;
1093 }
1094 return 0;
1095 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001096 if (PyBytes_Check(tag)) {
1097 char *p = PyBytes_AS_STRING(tag);
1098 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001099 if (p[i] == '{')
1100 check = 0;
1101 else if (p[i] == '}')
1102 check = 1;
1103 else if (check && PATHCHAR(p[i]))
1104 return 1;
1105 }
1106 return 0;
1107 }
1108
1109 return 1; /* unknown type; might be path expression */
1110}
1111
Serhiy Storchakacb985562015-05-04 15:32:48 +03001112/*[clinic input]
1113_elementtree.Element.extend
1114
1115 elements: object
1116 /
1117
1118[clinic start generated code]*/
1119
1120static PyObject *
1121_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1122/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123{
1124 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001125 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 if (!seq) {
1129 PyErr_Format(
1130 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001131 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001132 );
1133 return NULL;
1134 }
1135
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001137 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 Py_INCREF(element);
1139 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001140 PyErr_Format(
1141 PyExc_TypeError,
1142 "expected an Element, not \"%.200s\"",
1143 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001144 Py_DECREF(seq);
1145 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001146 return NULL;
1147 }
1148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (element_add_subelement(self, element) < 0) {
1150 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001151 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001152 return NULL;
1153 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001154 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001155 }
1156
1157 Py_DECREF(seq);
1158
1159 Py_RETURN_NONE;
1160}
1161
Serhiy Storchakacb985562015-05-04 15:32:48 +03001162/*[clinic input]
1163_elementtree.Element.find
1164
1165 path: object
1166 namespaces: object = None
1167
1168[clinic start generated code]*/
1169
1170static PyObject *
1171_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1172 PyObject *namespaces)
1173/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001175 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001176 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177
Serhiy Storchakacb985562015-05-04 15:32:48 +03001178 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001179 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001180 return _PyObject_CallMethodIdObjArgs(
1181 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001183 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184
1185 if (!self->extra)
1186 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001188 for (i = 0; i < self->extra->length; i++) {
1189 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 int rc;
1191 if (!Element_CheckExact(item))
1192 continue;
1193 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001194 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001197 Py_DECREF(item);
1198 if (rc < 0)
1199 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001200 }
1201
1202 Py_RETURN_NONE;
1203}
1204
Serhiy Storchakacb985562015-05-04 15:32:48 +03001205/*[clinic input]
1206_elementtree.Element.findtext
1207
1208 path: object
1209 default: object = None
1210 namespaces: object = None
1211
1212[clinic start generated code]*/
1213
1214static PyObject *
1215_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1216 PyObject *default_value,
1217 PyObject *namespaces)
1218/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001220 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001221 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001222 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001223
Serhiy Storchakacb985562015-05-04 15:32:48 +03001224 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001225 return _PyObject_CallMethodIdObjArgs(
1226 st->elementpath_obj, &PyId_findtext,
1227 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001228 );
1229
1230 if (!self->extra) {
1231 Py_INCREF(default_value);
1232 return default_value;
1233 }
1234
1235 for (i = 0; i < self->extra->length; i++) {
1236 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 int rc;
1238 if (!Element_CheckExact(item))
1239 continue;
1240 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001241 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001243 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 if (text == Py_None) {
1245 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001246 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001248 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 return text;
1251 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001252 Py_DECREF(item);
1253 if (rc < 0)
1254 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001255 }
1256
1257 Py_INCREF(default_value);
1258 return default_value;
1259}
1260
Serhiy Storchakacb985562015-05-04 15:32:48 +03001261/*[clinic input]
1262_elementtree.Element.findall
1263
1264 path: object
1265 namespaces: object = None
1266
1267[clinic start generated code]*/
1268
1269static PyObject *
1270_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1271 PyObject *namespaces)
1272/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001274 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001276 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001277 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001280 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001281 return _PyObject_CallMethodIdObjArgs(
1282 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001283 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001284 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001285
1286 out = PyList_New(0);
1287 if (!out)
1288 return NULL;
1289
1290 if (!self->extra)
1291 return out;
1292
1293 for (i = 0; i < self->extra->length; i++) {
1294 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001295 int rc;
1296 if (!Element_CheckExact(item))
1297 continue;
1298 Py_INCREF(item);
1299 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1300 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1301 Py_DECREF(item);
1302 Py_DECREF(out);
1303 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001305 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001306 }
1307
1308 return out;
1309}
1310
Serhiy Storchakacb985562015-05-04 15:32:48 +03001311/*[clinic input]
1312_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001313
Serhiy Storchakacb985562015-05-04 15:32:48 +03001314 path: object
1315 namespaces: object = None
1316
1317[clinic start generated code]*/
1318
1319static PyObject *
1320_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1321 PyObject *namespaces)
1322/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1323{
1324 PyObject* tag = path;
1325 _Py_IDENTIFIER(iterfind);
1326 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327
Victor Stinnerf5616342016-12-09 15:26:00 +01001328 return _PyObject_CallMethodIdObjArgs(
1329 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001330}
1331
Serhiy Storchakacb985562015-05-04 15:32:48 +03001332/*[clinic input]
1333_elementtree.Element.get
1334
1335 key: object
1336 default: object = None
1337
1338[clinic start generated code]*/
1339
1340static PyObject *
1341_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1342 PyObject *default_value)
1343/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344{
1345 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001346
1347 if (!self->extra || self->extra->attrib == Py_None)
1348 value = default_value;
1349 else {
1350 value = PyDict_GetItem(self->extra->attrib, key);
1351 if (!value)
1352 value = default_value;
1353 }
1354
1355 Py_INCREF(value);
1356 return value;
1357}
1358
Serhiy Storchakacb985562015-05-04 15:32:48 +03001359/*[clinic input]
1360_elementtree.Element.getchildren
1361
1362[clinic start generated code]*/
1363
1364static PyObject *
1365_elementtree_Element_getchildren_impl(ElementObject *self)
1366/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001368 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001369 PyObject* list;
1370
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001371 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1372 "This method will be removed in future versions. "
1373 "Use 'list(elem)' or iteration over elem instead.",
1374 1) < 0) {
1375 return NULL;
1376 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001377
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001378 if (!self->extra)
1379 return PyList_New(0);
1380
1381 list = PyList_New(self->extra->length);
1382 if (!list)
1383 return NULL;
1384
1385 for (i = 0; i < self->extra->length; i++) {
1386 PyObject* item = self->extra->children[i];
1387 Py_INCREF(item);
1388 PyList_SET_ITEM(list, i, item);
1389 }
1390
1391 return list;
1392}
1393
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001394
Eli Bendersky64d11e62012-06-15 07:42:50 +03001395static PyObject *
1396create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1397
1398
Serhiy Storchakacb985562015-05-04 15:32:48 +03001399/*[clinic input]
1400_elementtree.Element.iter
1401
1402 tag: object = None
1403
1404[clinic start generated code]*/
1405
Eli Bendersky64d11e62012-06-15 07:42:50 +03001406static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001407_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1408/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001409{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001410 if (PyUnicode_Check(tag)) {
1411 if (PyUnicode_READY(tag) < 0)
1412 return NULL;
1413 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1414 tag = Py_None;
1415 }
1416 else if (PyBytes_Check(tag)) {
1417 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1418 tag = Py_None;
1419 }
1420
Eli Bendersky64d11e62012-06-15 07:42:50 +03001421 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001422}
1423
1424
Serhiy Storchakacb985562015-05-04 15:32:48 +03001425/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001426_elementtree.Element.getiterator
1427
1428 tag: object = None
1429
1430[clinic start generated code]*/
1431
1432static PyObject *
1433_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1434/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1435{
1436 /* Change for a DeprecationWarning in 1.4 */
1437 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1438 "This method will be removed in future versions. "
1439 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1440 1) < 0) {
1441 return NULL;
1442 }
1443 return _elementtree_Element_iter_impl(self, tag);
1444}
1445
1446
1447/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001448_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001449
Serhiy Storchakacb985562015-05-04 15:32:48 +03001450[clinic start generated code]*/
1451
1452static PyObject *
1453_elementtree_Element_itertext_impl(ElementObject *self)
1454/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1455{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001456 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457}
1458
Eli Bendersky64d11e62012-06-15 07:42:50 +03001459
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001461element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001463 ElementObject* self = (ElementObject*) self_;
1464
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001465 if (!self->extra || index < 0 || index >= self->extra->length) {
1466 PyErr_SetString(
1467 PyExc_IndexError,
1468 "child index out of range"
1469 );
1470 return NULL;
1471 }
1472
1473 Py_INCREF(self->extra->children[index]);
1474 return self->extra->children[index];
1475}
1476
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477/*[clinic input]
1478_elementtree.Element.insert
1479
1480 index: Py_ssize_t
1481 subelement: object(subclass_of='&Element_Type')
1482 /
1483
1484[clinic start generated code]*/
1485
1486static PyObject *
1487_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1488 PyObject *subelement)
1489/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001490{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001491 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492
Victor Stinner5f0af232013-07-11 23:01:36 +02001493 if (!self->extra) {
1494 if (create_extra(self, NULL) < 0)
1495 return NULL;
1496 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001498 if (index < 0) {
1499 index += self->extra->length;
1500 if (index < 0)
1501 index = 0;
1502 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001503 if (index > self->extra->length)
1504 index = self->extra->length;
1505
1506 if (element_resize(self, 1) < 0)
1507 return NULL;
1508
1509 for (i = self->extra->length; i > index; i--)
1510 self->extra->children[i] = self->extra->children[i-1];
1511
Serhiy Storchakacb985562015-05-04 15:32:48 +03001512 Py_INCREF(subelement);
1513 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514
1515 self->extra->length++;
1516
1517 Py_RETURN_NONE;
1518}
1519
Serhiy Storchakacb985562015-05-04 15:32:48 +03001520/*[clinic input]
1521_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522
Serhiy Storchakacb985562015-05-04 15:32:48 +03001523[clinic start generated code]*/
1524
1525static PyObject *
1526_elementtree_Element_items_impl(ElementObject *self)
1527/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1528{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529 if (!self->extra || self->extra->attrib == Py_None)
1530 return PyList_New(0);
1531
1532 return PyDict_Items(self->extra->attrib);
1533}
1534
Serhiy Storchakacb985562015-05-04 15:32:48 +03001535/*[clinic input]
1536_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001537
Serhiy Storchakacb985562015-05-04 15:32:48 +03001538[clinic start generated code]*/
1539
1540static PyObject *
1541_elementtree_Element_keys_impl(ElementObject *self)
1542/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1543{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 if (!self->extra || self->extra->attrib == Py_None)
1545 return PyList_New(0);
1546
1547 return PyDict_Keys(self->extra->attrib);
1548}
1549
Martin v. Löwis18e16552006-02-15 17:27:45 +00001550static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551element_length(ElementObject* self)
1552{
1553 if (!self->extra)
1554 return 0;
1555
1556 return self->extra->length;
1557}
1558
Serhiy Storchakacb985562015-05-04 15:32:48 +03001559/*[clinic input]
1560_elementtree.Element.makeelement
1561
1562 tag: object
1563 attrib: object
1564 /
1565
1566[clinic start generated code]*/
1567
1568static PyObject *
1569_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1570 PyObject *attrib)
1571/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572{
1573 PyObject* elem;
1574
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575 attrib = PyDict_Copy(attrib);
1576 if (!attrib)
1577 return NULL;
1578
Eli Bendersky092af1f2012-03-04 07:14:03 +02001579 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580
1581 Py_DECREF(attrib);
1582
1583 return elem;
1584}
1585
Serhiy Storchakacb985562015-05-04 15:32:48 +03001586/*[clinic input]
1587_elementtree.Element.remove
1588
1589 subelement: object(subclass_of='&Element_Type')
1590 /
1591
1592[clinic start generated code]*/
1593
1594static PyObject *
1595_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1596/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001598 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001599 int rc;
1600 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602 if (!self->extra) {
1603 /* element has no children, so raise exception */
1604 PyErr_SetString(
1605 PyExc_ValueError,
1606 "list.remove(x): x not in list"
1607 );
1608 return NULL;
1609 }
1610
1611 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001612 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001614 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001615 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001616 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001617 if (rc < 0)
1618 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619 }
1620
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001621 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001622 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001623 PyErr_SetString(
1624 PyExc_ValueError,
1625 "list.remove(x): x not in list"
1626 );
1627 return NULL;
1628 }
1629
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001630 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631
1632 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001633 for (; i < self->extra->length; i++)
1634 self->extra->children[i] = self->extra->children[i+1];
1635
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001636 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637 Py_RETURN_NONE;
1638}
1639
1640static PyObject*
1641element_repr(ElementObject* self)
1642{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001643 int status;
1644
1645 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001646 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001647
1648 status = Py_ReprEnter((PyObject *)self);
1649 if (status == 0) {
1650 PyObject *res;
1651 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1652 Py_ReprLeave((PyObject *)self);
1653 return res;
1654 }
1655 if (status > 0)
1656 PyErr_Format(PyExc_RuntimeError,
1657 "reentrant call inside %s.__repr__",
1658 Py_TYPE(self)->tp_name);
1659 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001660}
1661
Serhiy Storchakacb985562015-05-04 15:32:48 +03001662/*[clinic input]
1663_elementtree.Element.set
1664
1665 key: object
1666 value: object
1667 /
1668
1669[clinic start generated code]*/
1670
1671static PyObject *
1672_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1673 PyObject *value)
1674/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001675{
1676 PyObject* attrib;
1677
Victor Stinner5f0af232013-07-11 23:01:36 +02001678 if (!self->extra) {
1679 if (create_extra(self, NULL) < 0)
1680 return NULL;
1681 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001682
1683 attrib = element_get_attrib(self);
1684 if (!attrib)
1685 return NULL;
1686
1687 if (PyDict_SetItem(attrib, key, value) < 0)
1688 return NULL;
1689
1690 Py_RETURN_NONE;
1691}
1692
1693static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001694element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001695{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001696 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001697 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001698 PyObject* old;
1699
1700 if (!self->extra || index < 0 || index >= self->extra->length) {
1701 PyErr_SetString(
1702 PyExc_IndexError,
1703 "child assignment index out of range");
1704 return -1;
1705 }
1706
1707 old = self->extra->children[index];
1708
1709 if (item) {
1710 Py_INCREF(item);
1711 self->extra->children[index] = item;
1712 } else {
1713 self->extra->length--;
1714 for (i = index; i < self->extra->length; i++)
1715 self->extra->children[i] = self->extra->children[i+1];
1716 }
1717
1718 Py_DECREF(old);
1719
1720 return 0;
1721}
1722
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001723static PyObject*
1724element_subscr(PyObject* self_, PyObject* item)
1725{
1726 ElementObject* self = (ElementObject*) self_;
1727
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001728 if (PyIndex_Check(item)) {
1729 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001730
1731 if (i == -1 && PyErr_Occurred()) {
1732 return NULL;
1733 }
1734 if (i < 0 && self->extra)
1735 i += self->extra->length;
1736 return element_getitem(self_, i);
1737 }
1738 else if (PySlice_Check(item)) {
1739 Py_ssize_t start, stop, step, slicelen, cur, i;
1740 PyObject* list;
1741
1742 if (!self->extra)
1743 return PyList_New(0);
1744
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001745 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746 return NULL;
1747 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001748 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1749 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001750
1751 if (slicelen <= 0)
1752 return PyList_New(0);
1753 else {
1754 list = PyList_New(slicelen);
1755 if (!list)
1756 return NULL;
1757
1758 for (cur = start, i = 0; i < slicelen;
1759 cur += step, i++) {
1760 PyObject* item = self->extra->children[cur];
1761 Py_INCREF(item);
1762 PyList_SET_ITEM(list, i, item);
1763 }
1764
1765 return list;
1766 }
1767 }
1768 else {
1769 PyErr_SetString(PyExc_TypeError,
1770 "element indices must be integers");
1771 return NULL;
1772 }
1773}
1774
1775static int
1776element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1777{
1778 ElementObject* self = (ElementObject*) self_;
1779
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001780 if (PyIndex_Check(item)) {
1781 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782
1783 if (i == -1 && PyErr_Occurred()) {
1784 return -1;
1785 }
1786 if (i < 0 && self->extra)
1787 i += self->extra->length;
1788 return element_setitem(self_, i, value);
1789 }
1790 else if (PySlice_Check(item)) {
1791 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1792
1793 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001794 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795
Victor Stinner5f0af232013-07-11 23:01:36 +02001796 if (!self->extra) {
1797 if (create_extra(self, NULL) < 0)
1798 return -1;
1799 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001800
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001801 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001802 return -1;
1803 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001804 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1805 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001806
Eli Bendersky865756a2012-03-09 13:38:15 +02001807 if (value == NULL) {
1808 /* Delete slice */
1809 size_t cur;
1810 Py_ssize_t i;
1811
1812 if (slicelen <= 0)
1813 return 0;
1814
1815 /* Since we're deleting, the direction of the range doesn't matter,
1816 * so for simplicity make it always ascending.
1817 */
1818 if (step < 0) {
1819 stop = start + 1;
1820 start = stop + step * (slicelen - 1) - 1;
1821 step = -step;
1822 }
1823
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001824 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001825
1826 /* recycle is a list that will contain all the children
1827 * scheduled for removal.
1828 */
1829 if (!(recycle = PyList_New(slicelen))) {
1830 PyErr_NoMemory();
1831 return -1;
1832 }
1833
1834 /* This loop walks over all the children that have to be deleted,
1835 * with cur pointing at them. num_moved is the amount of children
1836 * until the next deleted child that have to be "shifted down" to
1837 * occupy the deleted's places.
1838 * Note that in the ith iteration, shifting is done i+i places down
1839 * because i children were already removed.
1840 */
1841 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1842 /* Compute how many children have to be moved, clipping at the
1843 * list end.
1844 */
1845 Py_ssize_t num_moved = step - 1;
1846 if (cur + step >= (size_t)self->extra->length) {
1847 num_moved = self->extra->length - cur - 1;
1848 }
1849
1850 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1851
1852 memmove(
1853 self->extra->children + cur - i,
1854 self->extra->children + cur + 1,
1855 num_moved * sizeof(PyObject *));
1856 }
1857
1858 /* Leftover "tail" after the last removed child */
1859 cur = start + (size_t)slicelen * step;
1860 if (cur < (size_t)self->extra->length) {
1861 memmove(
1862 self->extra->children + cur - slicelen,
1863 self->extra->children + cur,
1864 (self->extra->length - cur) * sizeof(PyObject *));
1865 }
1866
1867 self->extra->length -= slicelen;
1868
1869 /* Discard the recycle list with all the deleted sub-elements */
1870 Py_XDECREF(recycle);
1871 return 0;
1872 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001873
1874 /* A new slice is actually being assigned */
1875 seq = PySequence_Fast(value, "");
1876 if (!seq) {
1877 PyErr_Format(
1878 PyExc_TypeError,
1879 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1880 );
1881 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001882 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001883 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001884
1885 if (step != 1 && newlen != slicelen)
1886 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001887 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001888 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001889 "attempt to assign sequence of size %zd "
1890 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001891 newlen, slicelen
1892 );
1893 return -1;
1894 }
1895
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896 /* Resize before creating the recycle bin, to prevent refleaks. */
1897 if (newlen > slicelen) {
1898 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001899 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001900 return -1;
1901 }
1902 }
1903
1904 if (slicelen > 0) {
1905 /* to avoid recursive calls to this method (via decref), move
1906 old items to the recycle bin here, and get rid of them when
1907 we're done modifying the element */
1908 recycle = PyList_New(slicelen);
1909 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001910 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001911 return -1;
1912 }
1913 for (cur = start, i = 0; i < slicelen;
1914 cur += step, i++)
1915 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1916 }
1917
1918 if (newlen < slicelen) {
1919 /* delete slice */
1920 for (i = stop; i < self->extra->length; i++)
1921 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1922 } else if (newlen > slicelen) {
1923 /* insert slice */
1924 for (i = self->extra->length-1; i >= stop; i--)
1925 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1926 }
1927
1928 /* replace the slice */
1929 for (cur = start, i = 0; i < newlen;
1930 cur += step, i++) {
1931 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1932 Py_INCREF(element);
1933 self->extra->children[cur] = element;
1934 }
1935
1936 self->extra->length += newlen - slicelen;
1937
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001938 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001939
1940 /* discard the recycle bin, and everything in it */
1941 Py_XDECREF(recycle);
1942
1943 return 0;
1944 }
1945 else {
1946 PyErr_SetString(PyExc_TypeError,
1947 "element indices must be integers");
1948 return -1;
1949 }
1950}
1951
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001952static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001953element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001954{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001955 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001956 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001957 return res;
1958}
1959
Serhiy Storchakadde08152015-11-25 15:28:13 +02001960static PyObject*
1961element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001963 PyObject *res = element_get_text(self);
1964 Py_XINCREF(res);
1965 return res;
1966}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001967
Serhiy Storchakadde08152015-11-25 15:28:13 +02001968static PyObject*
1969element_tail_getter(ElementObject *self, void *closure)
1970{
1971 PyObject *res = element_get_tail(self);
1972 Py_XINCREF(res);
1973 return res;
1974}
1975
1976static PyObject*
1977element_attrib_getter(ElementObject *self, void *closure)
1978{
1979 PyObject *res;
1980 if (!self->extra) {
1981 if (create_extra(self, NULL) < 0)
1982 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001983 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001984 res = element_get_attrib(self);
1985 Py_XINCREF(res);
1986 return res;
1987}
Victor Stinner4d463432013-07-11 23:05:03 +02001988
Serhiy Storchakadde08152015-11-25 15:28:13 +02001989/* macro for setter validation */
1990#define _VALIDATE_ATTR_VALUE(V) \
1991 if ((V) == NULL) { \
1992 PyErr_SetString( \
1993 PyExc_AttributeError, \
1994 "can't delete element attribute"); \
1995 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001996 }
1997
Serhiy Storchakadde08152015-11-25 15:28:13 +02001998static int
1999element_tag_setter(ElementObject *self, PyObject *value, void *closure)
2000{
2001 _VALIDATE_ATTR_VALUE(value);
2002 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002003 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002004 return 0;
2005}
2006
2007static int
2008element_text_setter(ElementObject *self, PyObject *value, void *closure)
2009{
2010 _VALIDATE_ATTR_VALUE(value);
2011 Py_INCREF(value);
2012 Py_DECREF(JOIN_OBJ(self->text));
2013 self->text = value;
2014 return 0;
2015}
2016
2017static int
2018element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2019{
2020 _VALIDATE_ATTR_VALUE(value);
2021 Py_INCREF(value);
2022 Py_DECREF(JOIN_OBJ(self->tail));
2023 self->tail = value;
2024 return 0;
2025}
2026
2027static int
2028element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2029{
2030 _VALIDATE_ATTR_VALUE(value);
2031 if (!self->extra) {
2032 if (create_extra(self, NULL) < 0)
2033 return -1;
2034 }
2035 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002036 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002037 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002038}
2039
2040static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002041 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002042 0, /* sq_concat */
2043 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002044 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002045 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002046 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002047 0,
2048};
2049
Eli Bendersky64d11e62012-06-15 07:42:50 +03002050/******************************* Element iterator ****************************/
2051
2052/* ElementIterObject represents the iteration state over an XML element in
2053 * pre-order traversal. To keep track of which sub-element should be returned
2054 * next, a stack of parents is maintained. This is a standard stack-based
2055 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002056 * The stack is managed using a continuous array.
2057 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002058 * the current one is exhausted, and the next child to examine in that parent.
2059 */
2060typedef struct ParentLocator_t {
2061 ElementObject *parent;
2062 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002063} ParentLocator;
2064
2065typedef struct {
2066 PyObject_HEAD
2067 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002068 Py_ssize_t parent_stack_used;
2069 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002070 ElementObject *root_element;
2071 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002072 int gettext;
2073} ElementIterObject;
2074
2075
2076static void
2077elementiter_dealloc(ElementIterObject *it)
2078{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002079 Py_ssize_t i = it->parent_stack_used;
2080 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002081 /* bpo-31095: UnTrack is needed before calling any callbacks */
2082 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002083 while (i--)
2084 Py_XDECREF(it->parent_stack[i].parent);
2085 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002086
2087 Py_XDECREF(it->sought_tag);
2088 Py_XDECREF(it->root_element);
2089
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 PyObject_GC_Del(it);
2091}
2092
2093static int
2094elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2095{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002096 Py_ssize_t i = it->parent_stack_used;
2097 while (i--)
2098 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099
2100 Py_VISIT(it->root_element);
2101 Py_VISIT(it->sought_tag);
2102 return 0;
2103}
2104
2105/* Helper function for elementiter_next. Add a new parent to the parent stack.
2106 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002107static int
2108parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002109{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002110 ParentLocator *item;
2111
2112 if (it->parent_stack_used >= it->parent_stack_size) {
2113 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2114 ParentLocator *parent_stack = it->parent_stack;
2115 PyMem_Resize(parent_stack, ParentLocator, new_size);
2116 if (parent_stack == NULL)
2117 return -1;
2118 it->parent_stack = parent_stack;
2119 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002121 item = it->parent_stack + it->parent_stack_used++;
2122 Py_INCREF(parent);
2123 item->parent = parent;
2124 item->child_index = 0;
2125 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126}
2127
2128static PyObject *
2129elementiter_next(ElementIterObject *it)
2130{
2131 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002132 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133 * A short note on gettext: this function serves both the iter() and
2134 * itertext() methods to avoid code duplication. However, there are a few
2135 * small differences in the way these iterations work. Namely:
2136 * - itertext() only yields text from nodes that have it, and continues
2137 * iterating when a node doesn't have text (so it doesn't return any
2138 * node like iter())
2139 * - itertext() also has to handle tail, after finishing with all the
2140 * children of a node.
2141 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002142 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002143 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002144 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145
2146 while (1) {
2147 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002148 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 * iterator is exhausted.
2150 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151 if (!it->parent_stack_used) {
2152 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002153 PyErr_SetNone(PyExc_StopIteration);
2154 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002155 }
2156
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002157 elem = it->root_element; /* steals a reference */
2158 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002159 }
2160 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002161 /* See if there are children left to traverse in the current parent. If
2162 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002163 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002164 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2165 Py_ssize_t child_index = item->child_index;
2166 ElementObjectExtra *extra;
2167 elem = item->parent;
2168 extra = elem->extra;
2169 if (!extra || child_index >= extra->length) {
2170 it->parent_stack_used--;
2171 /* Note that extra condition on it->parent_stack_used here;
2172 * this is because itertext() is supposed to only return *inner*
2173 * text, not text following the element it began iteration with.
2174 */
2175 if (it->gettext && it->parent_stack_used) {
2176 text = element_get_tail(elem);
2177 goto gettext;
2178 }
2179 Py_DECREF(elem);
2180 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002181 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002182
Serhiy Storchaka576def02017-03-30 09:47:31 +03002183 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2184 PyErr_Format(PyExc_AttributeError,
2185 "'%.100s' object has no attribute 'iter'",
2186 Py_TYPE(extra->children[child_index])->tp_name);
2187 return NULL;
2188 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002189 elem = (ElementObject *)extra->children[child_index];
2190 item->child_index++;
2191 Py_INCREF(elem);
2192 }
2193
2194 if (parent_stack_push_new(it, elem) < 0) {
2195 Py_DECREF(elem);
2196 PyErr_NoMemory();
2197 return NULL;
2198 }
2199 if (it->gettext) {
2200 text = element_get_text(elem);
2201 goto gettext;
2202 }
2203
2204 if (it->sought_tag == Py_None)
2205 return (PyObject *)elem;
2206
2207 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2208 if (rc > 0)
2209 return (PyObject *)elem;
2210
2211 Py_DECREF(elem);
2212 if (rc < 0)
2213 return NULL;
2214 continue;
2215
2216gettext:
2217 if (!text) {
2218 Py_DECREF(elem);
2219 return NULL;
2220 }
2221 if (text == Py_None) {
2222 Py_DECREF(elem);
2223 }
2224 else {
2225 Py_INCREF(text);
2226 Py_DECREF(elem);
2227 rc = PyObject_IsTrue(text);
2228 if (rc > 0)
2229 return text;
2230 Py_DECREF(text);
2231 if (rc < 0)
2232 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002233 }
2234 }
2235
2236 return NULL;
2237}
2238
2239
2240static PyTypeObject ElementIter_Type = {
2241 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002242 /* Using the module's name since the pure-Python implementation does not
2243 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002244 "_elementtree._element_iterator", /* tp_name */
2245 sizeof(ElementIterObject), /* tp_basicsize */
2246 0, /* tp_itemsize */
2247 /* methods */
2248 (destructor)elementiter_dealloc, /* tp_dealloc */
2249 0, /* tp_print */
2250 0, /* tp_getattr */
2251 0, /* tp_setattr */
2252 0, /* tp_reserved */
2253 0, /* tp_repr */
2254 0, /* tp_as_number */
2255 0, /* tp_as_sequence */
2256 0, /* tp_as_mapping */
2257 0, /* tp_hash */
2258 0, /* tp_call */
2259 0, /* tp_str */
2260 0, /* tp_getattro */
2261 0, /* tp_setattro */
2262 0, /* tp_as_buffer */
2263 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2264 0, /* tp_doc */
2265 (traverseproc)elementiter_traverse, /* tp_traverse */
2266 0, /* tp_clear */
2267 0, /* tp_richcompare */
2268 0, /* tp_weaklistoffset */
2269 PyObject_SelfIter, /* tp_iter */
2270 (iternextfunc)elementiter_next, /* tp_iternext */
2271 0, /* tp_methods */
2272 0, /* tp_members */
2273 0, /* tp_getset */
2274 0, /* tp_base */
2275 0, /* tp_dict */
2276 0, /* tp_descr_get */
2277 0, /* tp_descr_set */
2278 0, /* tp_dictoffset */
2279 0, /* tp_init */
2280 0, /* tp_alloc */
2281 0, /* tp_new */
2282};
2283
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002284#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002285
2286static PyObject *
2287create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2288{
2289 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002290
2291 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2292 if (!it)
2293 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294
Victor Stinner4d463432013-07-11 23:05:03 +02002295 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002296 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002297 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002298 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002299 it->root_element = self;
2300
Eli Bendersky64d11e62012-06-15 07:42:50 +03002301 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002302
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002303 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002304 if (it->parent_stack == NULL) {
2305 Py_DECREF(it);
2306 PyErr_NoMemory();
2307 return NULL;
2308 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002309 it->parent_stack_used = 0;
2310 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002311
Eli Bendersky64d11e62012-06-15 07:42:50 +03002312 return (PyObject *)it;
2313}
2314
2315
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002316/* ==================================================================== */
2317/* the tree builder type */
2318
2319typedef struct {
2320 PyObject_HEAD
2321
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323
Antoine Pitrouee329312012-10-04 19:53:29 +02002324 PyObject *this; /* current node */
2325 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326
Eli Bendersky58d548d2012-05-29 15:45:16 +03002327 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 PyObject *stack; /* element stack */
2330 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331
Eli Bendersky48d358b2012-05-30 17:57:50 +03002332 PyObject *element_factory;
2333
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002335 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002336 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2337 PyObject *end_event_obj;
2338 PyObject *start_ns_event_obj;
2339 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002340} TreeBuilderObject;
2341
Christian Heimes90aa7642007-12-19 02:45:37 +00002342#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343
2344/* -------------------------------------------------------------------- */
2345/* constructor and destructor */
2346
Eli Bendersky58d548d2012-05-29 15:45:16 +03002347static PyObject *
2348treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002349{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2351 if (t != NULL) {
2352 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002355 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002357 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358
Eli Bendersky58d548d2012-05-29 15:45:16 +03002359 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002360 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002361 t->stack = PyList_New(20);
2362 if (!t->stack) {
2363 Py_DECREF(t->this);
2364 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002365 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002366 return NULL;
2367 }
2368 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002370 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002371 t->start_event_obj = t->end_event_obj = NULL;
2372 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2373 }
2374 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002375}
2376
Serhiy Storchakacb985562015-05-04 15:32:48 +03002377/*[clinic input]
2378_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002379
Serhiy Storchakacb985562015-05-04 15:32:48 +03002380 element_factory: object = NULL
2381
2382[clinic start generated code]*/
2383
2384static int
2385_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2386 PyObject *element_factory)
2387/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2388{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002389 if (element_factory) {
2390 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002391 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002392 }
2393
Eli Bendersky58d548d2012-05-29 15:45:16 +03002394 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395}
2396
Eli Bendersky48d358b2012-05-30 17:57:50 +03002397static int
2398treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2399{
2400 Py_VISIT(self->root);
2401 Py_VISIT(self->this);
2402 Py_VISIT(self->last);
2403 Py_VISIT(self->data);
2404 Py_VISIT(self->stack);
2405 Py_VISIT(self->element_factory);
2406 return 0;
2407}
2408
2409static int
2410treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002412 Py_CLEAR(self->end_ns_event_obj);
2413 Py_CLEAR(self->start_ns_event_obj);
2414 Py_CLEAR(self->end_event_obj);
2415 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002416 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002417 Py_CLEAR(self->stack);
2418 Py_CLEAR(self->data);
2419 Py_CLEAR(self->last);
2420 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002421 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002422 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002423 return 0;
2424}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425
Eli Bendersky48d358b2012-05-30 17:57:50 +03002426static void
2427treebuilder_dealloc(TreeBuilderObject *self)
2428{
2429 PyObject_GC_UnTrack(self);
2430 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432}
2433
2434/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002435/* helpers for handling of arbitrary element-like objects */
2436
2437static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002438treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002439 PyObject **dest, _Py_Identifier *name)
2440{
2441 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002442 PyObject *tmp = JOIN_OBJ(*dest);
2443 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2444 *data = NULL;
2445 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002446 return 0;
2447 }
2448 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002449 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002450 int r;
2451 if (joined == NULL)
2452 return -1;
2453 r = _PyObject_SetAttrId(element, name, joined);
2454 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002455 if (r < 0)
2456 return -1;
2457 Py_CLEAR(*data);
2458 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002459 }
2460}
2461
Serhiy Storchaka576def02017-03-30 09:47:31 +03002462LOCAL(int)
2463treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002464{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002465 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002466
Serhiy Storchaka576def02017-03-30 09:47:31 +03002467 if (!self->data) {
2468 return 0;
2469 }
2470
2471 if (self->this == element) {
2472 _Py_IDENTIFIER(text);
2473 return treebuilder_set_element_text_or_tail(
2474 element, &self->data,
2475 &((ElementObject *) element)->text, &PyId_text);
2476 }
2477 else {
2478 _Py_IDENTIFIER(tail);
2479 return treebuilder_set_element_text_or_tail(
2480 element, &self->data,
2481 &((ElementObject *) element)->tail, &PyId_tail);
2482 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002483}
2484
2485static int
2486treebuilder_add_subelement(PyObject *element, PyObject *child)
2487{
2488 _Py_IDENTIFIER(append);
2489 if (Element_CheckExact(element)) {
2490 ElementObject *elem = (ElementObject *) element;
2491 return element_add_subelement(elem, child);
2492 }
2493 else {
2494 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002495 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002496 if (res == NULL)
2497 return -1;
2498 Py_DECREF(res);
2499 return 0;
2500 }
2501}
2502
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002503LOCAL(int)
2504treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2505 PyObject *node)
2506{
2507 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002508 PyObject *res;
2509 PyObject *event = PyTuple_Pack(2, action, node);
2510 if (event == NULL)
2511 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002512 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002513 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002514 if (res == NULL)
2515 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002516 Py_DECREF(res);
2517 }
2518 return 0;
2519}
2520
Antoine Pitrouee329312012-10-04 19:53:29 +02002521/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522/* handlers */
2523
2524LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2526 PyObject* attrib)
2527{
2528 PyObject* node;
2529 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002530 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531
Serhiy Storchaka576def02017-03-30 09:47:31 +03002532 if (treebuilder_flush_data(self) < 0) {
2533 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 }
2535
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002536 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002537 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002538 } else if (attrib == Py_None) {
2539 attrib = PyDict_New();
2540 if (!attrib)
2541 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002542 node = PyObject_CallFunctionObjArgs(self->element_factory,
2543 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002544 Py_DECREF(attrib);
2545 }
2546 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002547 node = PyObject_CallFunctionObjArgs(self->element_factory,
2548 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002549 }
2550 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002552 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553
Antoine Pitrouee329312012-10-04 19:53:29 +02002554 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555
2556 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002557 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002558 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559 } else {
2560 if (self->root) {
2561 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002562 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 "multiple elements on top level"
2564 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002565 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566 }
2567 Py_INCREF(node);
2568 self->root = node;
2569 }
2570
2571 if (self->index < PyList_GET_SIZE(self->stack)) {
2572 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002573 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 Py_INCREF(this);
2575 } else {
2576 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002577 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 }
2579 self->index++;
2580
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002582 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002584 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002585
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002586 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2587 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002588
2589 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002590
2591 error:
2592 Py_DECREF(node);
2593 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594}
2595
2596LOCAL(PyObject*)
2597treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2598{
2599 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002600 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002601 /* ignore calls to data before the first call to start */
2602 Py_RETURN_NONE;
2603 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 /* store the first item as is */
2605 Py_INCREF(data); self->data = data;
2606 } else {
2607 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002608 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2609 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002610 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611 /* expat often generates single character data sections; handle
2612 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002613 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2614 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002616 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 } else if (PyList_CheckExact(self->data)) {
2618 if (PyList_Append(self->data, data) < 0)
2619 return NULL;
2620 } else {
2621 PyObject* list = PyList_New(2);
2622 if (!list)
2623 return NULL;
2624 PyList_SET_ITEM(list, 0, self->data);
2625 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2626 self->data = list;
2627 }
2628 }
2629
2630 Py_RETURN_NONE;
2631}
2632
2633LOCAL(PyObject*)
2634treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2635{
2636 PyObject* item;
2637
Serhiy Storchaka576def02017-03-30 09:47:31 +03002638 if (treebuilder_flush_data(self) < 0) {
2639 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640 }
2641
2642 if (self->index == 0) {
2643 PyErr_SetString(
2644 PyExc_IndexError,
2645 "pop from empty stack"
2646 );
2647 return NULL;
2648 }
2649
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002650 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002651 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002652 self->index--;
2653 self->this = PyList_GET_ITEM(self->stack, self->index);
2654 Py_INCREF(self->this);
2655 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002657 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2658 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659
2660 Py_INCREF(self->last);
2661 return (PyObject*) self->last;
2662}
2663
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664/* -------------------------------------------------------------------- */
2665/* methods (in alphabetical order) */
2666
Serhiy Storchakacb985562015-05-04 15:32:48 +03002667/*[clinic input]
2668_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669
Serhiy Storchakacb985562015-05-04 15:32:48 +03002670 data: object
2671 /
2672
2673[clinic start generated code]*/
2674
2675static PyObject *
2676_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2677/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2678{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679 return treebuilder_handle_data(self, data);
2680}
2681
Serhiy Storchakacb985562015-05-04 15:32:48 +03002682/*[clinic input]
2683_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684
Serhiy Storchakacb985562015-05-04 15:32:48 +03002685 tag: object
2686 /
2687
2688[clinic start generated code]*/
2689
2690static PyObject *
2691_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2692/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2693{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694 return treebuilder_handle_end(self, tag);
2695}
2696
2697LOCAL(PyObject*)
2698treebuilder_done(TreeBuilderObject* self)
2699{
2700 PyObject* res;
2701
2702 /* FIXME: check stack size? */
2703
2704 if (self->root)
2705 res = self->root;
2706 else
2707 res = Py_None;
2708
2709 Py_INCREF(res);
2710 return res;
2711}
2712
Serhiy Storchakacb985562015-05-04 15:32:48 +03002713/*[clinic input]
2714_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Serhiy Storchakacb985562015-05-04 15:32:48 +03002716[clinic start generated code]*/
2717
2718static PyObject *
2719_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2720/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2721{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722 return treebuilder_done(self);
2723}
2724
Serhiy Storchakacb985562015-05-04 15:32:48 +03002725/*[clinic input]
2726_elementtree.TreeBuilder.start
2727
2728 tag: object
2729 attrs: object = None
2730 /
2731
2732[clinic start generated code]*/
2733
2734static PyObject *
2735_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2736 PyObject *attrs)
2737/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002739 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740}
2741
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742/* ==================================================================== */
2743/* the expat interface */
2744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002747
2748/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2749 * cached globally without being in per-module state.
2750 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002751static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753
Eli Bendersky52467b12012-06-01 07:13:08 +03002754static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2755 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2756
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757typedef struct {
2758 PyObject_HEAD
2759
2760 XML_Parser parser;
2761
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002762 PyObject *target;
2763 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002765 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002767 PyObject *handle_start;
2768 PyObject *handle_data;
2769 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002771 PyObject *handle_comment;
2772 PyObject *handle_pi;
2773 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002775 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002776
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777} XMLParserObject;
2778
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002779static PyObject*
Serhiy Storchaka6969eaf2017-07-03 21:20:15 +03002780_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject **args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002781static PyObject *
2782_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2783 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002784
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785/* helpers */
2786
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787LOCAL(PyObject*)
2788makeuniversal(XMLParserObject* self, const char* string)
2789{
2790 /* convert a UTF-8 tag/attribute name from the expat parser
2791 to a universal name string */
2792
Antoine Pitrouc1948842012-10-01 23:40:37 +02002793 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 PyObject* key;
2795 PyObject* value;
2796
2797 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002798 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799 if (!key)
2800 return NULL;
2801
2802 value = PyDict_GetItem(self->names, key);
2803
2804 if (value) {
2805 Py_INCREF(value);
2806 } else {
2807 /* new name. convert to universal name, and decode as
2808 necessary */
2809
2810 PyObject* tag;
2811 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002812 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813
2814 /* look for namespace separator */
2815 for (i = 0; i < size; i++)
2816 if (string[i] == '}')
2817 break;
2818 if (i != size) {
2819 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002820 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002821 if (tag == NULL) {
2822 Py_DECREF(key);
2823 return NULL;
2824 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002825 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002826 p[0] = '{';
2827 memcpy(p+1, string, size);
2828 size++;
2829 } else {
2830 /* plain name; use key as tag */
2831 Py_INCREF(key);
2832 tag = key;
2833 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002834
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002836 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002837 value = PyUnicode_DecodeUTF8(p, size, "strict");
2838 Py_DECREF(tag);
2839 if (!value) {
2840 Py_DECREF(key);
2841 return NULL;
2842 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002843
2844 /* add to names dictionary */
2845 if (PyDict_SetItem(self->names, key, value) < 0) {
2846 Py_DECREF(key);
2847 Py_DECREF(value);
2848 return NULL;
2849 }
2850 }
2851
2852 Py_DECREF(key);
2853 return value;
2854}
2855
Eli Bendersky5b77d812012-03-16 08:20:05 +02002856/* Set the ParseError exception with the given parameters.
2857 * If message is not NULL, it's used as the error string. Otherwise, the
2858 * message string is the default for the given error_code.
2859*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002861expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2862 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002863{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002864 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002865 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002867 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002868 message ? message : EXPAT(ErrorString)(error_code),
2869 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002870 if (errmsg == NULL)
2871 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002873 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002874 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002875 if (!error)
2876 return;
2877
Eli Bendersky5b77d812012-03-16 08:20:05 +02002878 /* Add code and position attributes */
2879 code = PyLong_FromLong((long)error_code);
2880 if (!code) {
2881 Py_DECREF(error);
2882 return;
2883 }
2884 if (PyObject_SetAttrString(error, "code", code) == -1) {
2885 Py_DECREF(error);
2886 Py_DECREF(code);
2887 return;
2888 }
2889 Py_DECREF(code);
2890
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002891 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002892 if (!position) {
2893 Py_DECREF(error);
2894 return;
2895 }
2896 if (PyObject_SetAttrString(error, "position", position) == -1) {
2897 Py_DECREF(error);
2898 Py_DECREF(position);
2899 return;
2900 }
2901 Py_DECREF(position);
2902
Eli Bendersky532d03e2013-08-10 08:00:39 -07002903 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904 Py_DECREF(error);
2905}
2906
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907/* -------------------------------------------------------------------- */
2908/* handlers */
2909
2910static void
2911expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2912 int data_len)
2913{
2914 PyObject* key;
2915 PyObject* value;
2916 PyObject* res;
2917
2918 if (data_len < 2 || data_in[0] != '&')
2919 return;
2920
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002921 if (PyErr_Occurred())
2922 return;
2923
Neal Norwitz0269b912007-08-08 06:56:02 +00002924 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 if (!key)
2926 return;
2927
2928 value = PyDict_GetItem(self->entity, key);
2929
2930 if (value) {
2931 if (TreeBuilder_CheckExact(self->target))
2932 res = treebuilder_handle_data(
2933 (TreeBuilderObject*) self->target, value
2934 );
2935 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002936 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 else
2938 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002940 } else if (!PyErr_Occurred()) {
2941 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002942 char message[128] = "undefined entity ";
2943 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002944 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002945 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002947 EXPAT(GetErrorColumnNumber)(self->parser),
2948 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 );
2950 }
2951
2952 Py_DECREF(key);
2953}
2954
2955static void
2956expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2957 const XML_Char **attrib_in)
2958{
2959 PyObject* res;
2960 PyObject* tag;
2961 PyObject* attrib;
2962 int ok;
2963
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002964 if (PyErr_Occurred())
2965 return;
2966
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 /* tag name */
2968 tag = makeuniversal(self, tag_in);
2969 if (!tag)
2970 return; /* parser will look for errors */
2971
2972 /* attributes */
2973 if (attrib_in[0]) {
2974 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002975 if (!attrib) {
2976 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002977 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002978 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 while (attrib_in[0] && attrib_in[1]) {
2980 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002981 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982 if (!key || !value) {
2983 Py_XDECREF(value);
2984 Py_XDECREF(key);
2985 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002986 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 return;
2988 }
2989 ok = PyDict_SetItem(attrib, key, value);
2990 Py_DECREF(value);
2991 Py_DECREF(key);
2992 if (ok < 0) {
2993 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002994 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 return;
2996 }
2997 attrib_in += 2;
2998 }
2999 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003000 Py_INCREF(Py_None);
3001 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003002 }
3003
3004 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005 /* shortcut */
3006 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3007 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003008 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003009 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003010 if (attrib == Py_None) {
3011 Py_DECREF(attrib);
3012 attrib = PyDict_New();
3013 if (!attrib) {
3014 Py_DECREF(tag);
3015 return;
3016 }
3017 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003018 res = PyObject_CallFunctionObjArgs(self->handle_start,
3019 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003020 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003021 res = NULL;
3022
3023 Py_DECREF(tag);
3024 Py_DECREF(attrib);
3025
3026 Py_XDECREF(res);
3027}
3028
3029static void
3030expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3031 int data_len)
3032{
3033 PyObject* data;
3034 PyObject* res;
3035
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003036 if (PyErr_Occurred())
3037 return;
3038
Neal Norwitz0269b912007-08-08 06:56:02 +00003039 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003040 if (!data)
3041 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003042
3043 if (TreeBuilder_CheckExact(self->target))
3044 /* shortcut */
3045 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3046 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003047 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003048 else
3049 res = NULL;
3050
3051 Py_DECREF(data);
3052
3053 Py_XDECREF(res);
3054}
3055
3056static void
3057expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3058{
3059 PyObject* tag;
3060 PyObject* res = NULL;
3061
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003062 if (PyErr_Occurred())
3063 return;
3064
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065 if (TreeBuilder_CheckExact(self->target))
3066 /* shortcut */
3067 /* the standard tree builder doesn't look at the end tag */
3068 res = treebuilder_handle_end(
3069 (TreeBuilderObject*) self->target, Py_None
3070 );
3071 else if (self->handle_end) {
3072 tag = makeuniversal(self, tag_in);
3073 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003074 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075 Py_DECREF(tag);
3076 }
3077 }
3078
3079 Py_XDECREF(res);
3080}
3081
3082static void
3083expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3084 const XML_Char *uri)
3085{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003086 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3087 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003088
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003089 if (PyErr_Occurred())
3090 return;
3091
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003092 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003093 return;
3094
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003095 if (!uri)
3096 uri = "";
3097 if (!prefix)
3098 prefix = "";
3099
3100 parcel = Py_BuildValue("ss", prefix, uri);
3101 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003102 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003103 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3104 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105}
3106
3107static void
3108expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3109{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003110 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3111
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003112 if (PyErr_Occurred())
3113 return;
3114
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003115 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003116 return;
3117
3118 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119}
3120
3121static void
3122expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3123{
3124 PyObject* comment;
3125 PyObject* res;
3126
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003127 if (PyErr_Occurred())
3128 return;
3129
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003130 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003131 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003133 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3134 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003135 Py_XDECREF(res);
3136 Py_DECREF(comment);
3137 }
3138 }
3139}
3140
Eli Bendersky45839902013-01-13 05:14:47 -08003141static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003142expat_start_doctype_handler(XMLParserObject *self,
3143 const XML_Char *doctype_name,
3144 const XML_Char *sysid,
3145 const XML_Char *pubid,
3146 int has_internal_subset)
3147{
3148 PyObject *self_pyobj = (PyObject *)self;
3149 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3150 PyObject *parser_doctype = NULL;
3151 PyObject *res = NULL;
3152
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003153 if (PyErr_Occurred())
3154 return;
3155
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003156 doctype_name_obj = makeuniversal(self, doctype_name);
3157 if (!doctype_name_obj)
3158 return;
3159
3160 if (sysid) {
3161 sysid_obj = makeuniversal(self, sysid);
3162 if (!sysid_obj) {
3163 Py_DECREF(doctype_name_obj);
3164 return;
3165 }
3166 } else {
3167 Py_INCREF(Py_None);
3168 sysid_obj = Py_None;
3169 }
3170
3171 if (pubid) {
3172 pubid_obj = makeuniversal(self, pubid);
3173 if (!pubid_obj) {
3174 Py_DECREF(doctype_name_obj);
3175 Py_DECREF(sysid_obj);
3176 return;
3177 }
3178 } else {
3179 Py_INCREF(Py_None);
3180 pubid_obj = Py_None;
3181 }
3182
3183 /* If the target has a handler for doctype, call it. */
3184 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003185 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3186 doctype_name_obj, pubid_obj,
3187 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003188 Py_CLEAR(res);
3189 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003190 else {
3191 /* Now see if the parser itself has a doctype method. If yes and it's
3192 * a custom method, call it but warn about deprecation. If it's only
3193 * the vanilla XMLParser method, do nothing.
3194 */
3195 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3196 if (parser_doctype &&
3197 !(PyCFunction_Check(parser_doctype) &&
3198 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3199 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003200 (PyCFunction) _elementtree_XMLParser_doctype)) {
3201 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3202 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003203 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003204 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003205 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003206 res = PyObject_CallFunctionObjArgs(parser_doctype,
3207 doctype_name_obj, pubid_obj,
3208 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003209 Py_CLEAR(res);
3210 }
3211 }
3212
3213clear:
3214 Py_XDECREF(parser_doctype);
3215 Py_DECREF(doctype_name_obj);
3216 Py_DECREF(pubid_obj);
3217 Py_DECREF(sysid_obj);
3218}
3219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220static void
3221expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3222 const XML_Char* data_in)
3223{
3224 PyObject* target;
3225 PyObject* data;
3226 PyObject* res;
3227
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003228 if (PyErr_Occurred())
3229 return;
3230
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003232 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3233 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003235 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3236 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003237 Py_XDECREF(res);
3238 Py_DECREF(data);
3239 Py_DECREF(target);
3240 } else {
3241 Py_XDECREF(data);
3242 Py_XDECREF(target);
3243 }
3244 }
3245}
3246
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248
Eli Bendersky52467b12012-06-01 07:13:08 +03003249static PyObject *
3250xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251{
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3253 if (self) {
3254 self->parser = NULL;
3255 self->target = self->entity = self->names = NULL;
3256 self->handle_start = self->handle_data = self->handle_end = NULL;
3257 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003258 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 return (PyObject *)self;
3261}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262
scoderc8d8e152017-09-14 22:00:03 +02003263static int
3264ignore_attribute_error(PyObject *value)
3265{
3266 if (value == NULL) {
3267 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3268 return -1;
3269 }
3270 PyErr_Clear();
3271 }
3272 return 0;
3273}
3274
Serhiy Storchakacb985562015-05-04 15:32:48 +03003275/*[clinic input]
3276_elementtree.XMLParser.__init__
3277
3278 html: object = NULL
3279 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003280 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003281
3282[clinic start generated code]*/
3283
Eli Bendersky52467b12012-06-01 07:13:08 +03003284static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3286 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003287/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003288{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003289 if (html != NULL) {
3290 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3291 "The html argument of XMLParser() is deprecated",
3292 1) < 0) {
3293 return -1;
3294 }
3295 }
3296
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 self->entity = PyDict_New();
3298 if (!self->entity)
3299 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 self->names = PyDict_New();
3302 if (!self->names) {
3303 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003304 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003306
Serhiy Storchakacb985562015-05-04 15:32:48 +03003307 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3308 if (!self->parser) {
3309 Py_CLEAR(self->entity);
3310 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003312 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313 }
3314
Eli Bendersky52467b12012-06-01 07:13:08 +03003315 if (target) {
3316 Py_INCREF(target);
3317 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003318 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 Py_CLEAR(self->entity);
3321 Py_CLEAR(self->names);
3322 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003323 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003325 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327
Serhiy Storchakacb985562015-05-04 15:32:48 +03003328 self->handle_start = PyObject_GetAttrString(target, "start");
scoderc8d8e152017-09-14 22:00:03 +02003329 if (ignore_attribute_error(self->handle_start)) {
3330 return -1;
3331 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003332 self->handle_data = PyObject_GetAttrString(target, "data");
scoderc8d8e152017-09-14 22:00:03 +02003333 if (ignore_attribute_error(self->handle_data)) {
3334 return -1;
3335 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003336 self->handle_end = PyObject_GetAttrString(target, "end");
scoderc8d8e152017-09-14 22:00:03 +02003337 if (ignore_attribute_error(self->handle_end)) {
3338 return -1;
3339 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 self->handle_comment = PyObject_GetAttrString(target, "comment");
scoderc8d8e152017-09-14 22:00:03 +02003341 if (ignore_attribute_error(self->handle_comment)) {
3342 return -1;
3343 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003344 self->handle_pi = PyObject_GetAttrString(target, "pi");
scoderc8d8e152017-09-14 22:00:03 +02003345 if (ignore_attribute_error(self->handle_pi)) {
3346 return -1;
3347 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003348 self->handle_close = PyObject_GetAttrString(target, "close");
scoderc8d8e152017-09-14 22:00:03 +02003349 if (ignore_attribute_error(self->handle_close)) {
3350 return -1;
3351 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003352 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
scoderc8d8e152017-09-14 22:00:03 +02003353 if (ignore_attribute_error(self->handle_doctype)) {
3354 return -1;
3355 }
Eli Bendersky45839902013-01-13 05:14:47 -08003356
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003358 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003360 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361 (XML_StartElementHandler) expat_start_handler,
3362 (XML_EndElementHandler) expat_end_handler
3363 );
3364 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366 (XML_DefaultHandler) expat_default_handler
3367 );
3368 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003369 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 (XML_CharacterDataHandler) expat_data_handler
3371 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003372 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003374 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375 (XML_CommentHandler) expat_comment_handler
3376 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003379 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 (XML_ProcessingInstructionHandler) expat_pi_handler
3381 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003382 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003383 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003384 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3385 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003388 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390
Eli Bendersky52467b12012-06-01 07:13:08 +03003391 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392}
3393
Eli Bendersky52467b12012-06-01 07:13:08 +03003394static int
3395xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3396{
3397 Py_VISIT(self->handle_close);
3398 Py_VISIT(self->handle_pi);
3399 Py_VISIT(self->handle_comment);
3400 Py_VISIT(self->handle_end);
3401 Py_VISIT(self->handle_data);
3402 Py_VISIT(self->handle_start);
3403
3404 Py_VISIT(self->target);
3405 Py_VISIT(self->entity);
3406 Py_VISIT(self->names);
3407
3408 return 0;
3409}
3410
3411static int
3412xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413{
Victor Stinnere727d412017-09-18 05:29:37 -07003414 if (self->parser != NULL) {
3415 XML_Parser parser = self->parser;
3416 self->parser = NULL;
3417 EXPAT(ParserFree)(parser);
3418 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419
Antoine Pitrouc1948842012-10-01 23:40:37 +02003420 Py_CLEAR(self->handle_close);
3421 Py_CLEAR(self->handle_pi);
3422 Py_CLEAR(self->handle_comment);
3423 Py_CLEAR(self->handle_end);
3424 Py_CLEAR(self->handle_data);
3425 Py_CLEAR(self->handle_start);
3426 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427
Antoine Pitrouc1948842012-10-01 23:40:37 +02003428 Py_CLEAR(self->target);
3429 Py_CLEAR(self->entity);
3430 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431
Eli Bendersky52467b12012-06-01 07:13:08 +03003432 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433}
3434
Eli Bendersky52467b12012-06-01 07:13:08 +03003435static void
3436xmlparser_dealloc(XMLParserObject* self)
3437{
3438 PyObject_GC_UnTrack(self);
3439 xmlparser_gc_clear(self);
3440 Py_TYPE(self)->tp_free((PyObject *)self);
3441}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003442
3443LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003444expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003445{
3446 int ok;
3447
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003448 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3450
3451 if (PyErr_Occurred())
3452 return NULL;
3453
3454 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003455 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003456 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003457 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003458 EXPAT(GetErrorColumnNumber)(self->parser),
3459 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460 );
3461 return NULL;
3462 }
3463
3464 Py_RETURN_NONE;
3465}
3466
Serhiy Storchakacb985562015-05-04 15:32:48 +03003467/*[clinic input]
3468_elementtree.XMLParser.close
3469
3470[clinic start generated code]*/
3471
3472static PyObject *
3473_elementtree_XMLParser_close_impl(XMLParserObject *self)
3474/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475{
3476 /* end feeding data to parser */
3477
3478 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003480 if (!res)
3481 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003483 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003484 Py_DECREF(res);
3485 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003486 }
3487 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003488 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003489 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003490 }
3491 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003492 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003493 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494}
3495
Serhiy Storchakacb985562015-05-04 15:32:48 +03003496/*[clinic input]
3497_elementtree.XMLParser.feed
3498
3499 data: object
3500 /
3501
3502[clinic start generated code]*/
3503
3504static PyObject *
3505_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3506/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507{
3508 /* feed data to parser */
3509
Serhiy Storchakacb985562015-05-04 15:32:48 +03003510 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003511 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003512 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3513 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003514 return NULL;
3515 if (data_len > INT_MAX) {
3516 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3517 return NULL;
3518 }
3519 /* Explicitly set UTF-8 encoding. Return code ignored. */
3520 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003521 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003522 }
3523 else {
3524 Py_buffer view;
3525 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003526 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003527 return NULL;
3528 if (view.len > INT_MAX) {
3529 PyBuffer_Release(&view);
3530 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3531 return NULL;
3532 }
3533 res = expat_parse(self, view.buf, (int)view.len, 0);
3534 PyBuffer_Release(&view);
3535 return res;
3536 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003537}
3538
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539/*[clinic input]
3540_elementtree.XMLParser._parse_whole
3541
3542 file: object
3543 /
3544
3545[clinic start generated code]*/
3546
3547static PyObject *
3548_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3549/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550{
Eli Benderskya3699232013-05-19 18:47:23 -07003551 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 PyObject* reader;
3553 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003554 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003555 PyObject* res;
3556
Serhiy Storchakacb985562015-05-04 15:32:48 +03003557 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558 if (!reader)
3559 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003560
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003561 /* read from open file object */
3562 for (;;) {
3563
3564 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3565
3566 if (!buffer) {
3567 /* read failed (e.g. due to KeyboardInterrupt) */
3568 Py_DECREF(reader);
3569 return NULL;
3570 }
3571
Eli Benderskyf996e772012-03-16 05:53:30 +02003572 if (PyUnicode_CheckExact(buffer)) {
3573 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003574 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003575 Py_DECREF(buffer);
3576 break;
3577 }
3578 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003579 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003580 if (!temp) {
3581 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003582 Py_DECREF(reader);
3583 return NULL;
3584 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003585 buffer = temp;
3586 }
3587 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588 Py_DECREF(buffer);
3589 break;
3590 }
3591
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003592 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3593 Py_DECREF(buffer);
3594 Py_DECREF(reader);
3595 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3596 return NULL;
3597 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003599 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600 );
3601
3602 Py_DECREF(buffer);
3603
3604 if (!res) {
3605 Py_DECREF(reader);
3606 return NULL;
3607 }
3608 Py_DECREF(res);
3609
3610 }
3611
3612 Py_DECREF(reader);
3613
3614 res = expat_parse(self, "", 0, 1);
3615
3616 if (res && TreeBuilder_CheckExact(self->target)) {
3617 Py_DECREF(res);
3618 return treebuilder_done((TreeBuilderObject*) self->target);
3619 }
3620
3621 return res;
3622}
3623
Serhiy Storchakacb985562015-05-04 15:32:48 +03003624/*[clinic input]
3625_elementtree.XMLParser.doctype
3626
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003627 name: object
3628 pubid: object
3629 system: object
3630 /
3631
Serhiy Storchakacb985562015-05-04 15:32:48 +03003632[clinic start generated code]*/
3633
3634static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003635_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3636 PyObject *pubid, PyObject *system)
3637/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003638{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003639 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3640 "This method of XMLParser is deprecated. Define"
3641 " doctype() method on the TreeBuilder target.",
3642 1) < 0) {
3643 return NULL;
3644 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003645 Py_RETURN_NONE;
3646}
3647
Serhiy Storchakacb985562015-05-04 15:32:48 +03003648/*[clinic input]
3649_elementtree.XMLParser._setevents
3650
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003651 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003652 events_to_report: object = None
3653 /
3654
3655[clinic start generated code]*/
3656
3657static PyObject *
3658_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3659 PyObject *events_queue,
3660 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003661/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662{
3663 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003664 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003665 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003666 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667
3668 if (!TreeBuilder_CheckExact(self->target)) {
3669 PyErr_SetString(
3670 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003671 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672 "targets"
3673 );
3674 return NULL;
3675 }
3676
3677 target = (TreeBuilderObject*) self->target;
3678
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003679 events_append = PyObject_GetAttrString(events_queue, "append");
3680 if (events_append == NULL)
3681 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003682 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683
3684 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003685 Py_CLEAR(target->start_event_obj);
3686 Py_CLEAR(target->end_event_obj);
3687 Py_CLEAR(target->start_ns_event_obj);
3688 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003690 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003691 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003692 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693 Py_RETURN_NONE;
3694 }
3695
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003696 if (!(events_seq = PySequence_Fast(events_to_report,
3697 "events must be a sequence"))) {
3698 return NULL;
3699 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003701 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003702 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003703 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003704 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003705 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003706 } else if (PyBytes_Check(event_name_obj)) {
3707 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003708 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003709 if (event_name == NULL) {
3710 Py_DECREF(events_seq);
3711 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3712 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003713 }
3714
3715 Py_INCREF(event_name_obj);
3716 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003717 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003718 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003719 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003720 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003721 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722 EXPAT(SetNamespaceDeclHandler)(
3723 self->parser,
3724 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3725 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3726 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003727 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003728 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003729 EXPAT(SetNamespaceDeclHandler)(
3730 self->parser,
3731 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3732 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3733 );
3734 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003735 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003736 Py_DECREF(events_seq);
3737 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738 return NULL;
3739 }
3740 }
3741
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003742 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003743 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003744}
3745
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003746static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003747xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003748{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003749 if (PyUnicode_Check(nameobj)) {
3750 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003751 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003752 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003753 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003754 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003755 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003756 return PyUnicode_FromFormat(
3757 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003758 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003759 }
3760 else
3761 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003762
Alexander Belopolskye239d232010-12-08 23:31:48 +00003763 Py_INCREF(res);
3764 return res;
3765 }
3766 generic:
3767 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003768}
3769
Serhiy Storchakacb985562015-05-04 15:32:48 +03003770#include "clinic/_elementtree.c.h"
3771
3772static PyMethodDef element_methods[] = {
3773
3774 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3775
3776 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3777 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3778
3779 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3780 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3781 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3782
3783 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3784 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3785 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3786 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3787
3788 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3789 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3790 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3791
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003792 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003793 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3794
3795 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3796 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3797
3798 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3799
3800 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3801 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3802 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3803 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3804 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3805
3806 {NULL, NULL}
3807};
3808
3809static PyMappingMethods element_as_mapping = {
3810 (lenfunc) element_length,
3811 (binaryfunc) element_subscr,
3812 (objobjargproc) element_ass_subscr,
3813};
3814
Serhiy Storchakadde08152015-11-25 15:28:13 +02003815static PyGetSetDef element_getsetlist[] = {
3816 {"tag",
3817 (getter)element_tag_getter,
3818 (setter)element_tag_setter,
3819 "A string identifying what kind of data this element represents"},
3820 {"text",
3821 (getter)element_text_getter,
3822 (setter)element_text_setter,
3823 "A string of text directly after the start tag, or None"},
3824 {"tail",
3825 (getter)element_tail_getter,
3826 (setter)element_tail_setter,
3827 "A string of text directly after the end tag, or None"},
3828 {"attrib",
3829 (getter)element_attrib_getter,
3830 (setter)element_attrib_setter,
3831 "A dictionary containing the element's attributes"},
3832 {NULL},
3833};
3834
Serhiy Storchakacb985562015-05-04 15:32:48 +03003835static PyTypeObject Element_Type = {
3836 PyVarObject_HEAD_INIT(NULL, 0)
3837 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3838 /* methods */
3839 (destructor)element_dealloc, /* tp_dealloc */
3840 0, /* tp_print */
3841 0, /* tp_getattr */
3842 0, /* tp_setattr */
3843 0, /* tp_reserved */
3844 (reprfunc)element_repr, /* tp_repr */
3845 0, /* tp_as_number */
3846 &element_as_sequence, /* tp_as_sequence */
3847 &element_as_mapping, /* tp_as_mapping */
3848 0, /* tp_hash */
3849 0, /* tp_call */
3850 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003851 PyObject_GenericGetAttr, /* tp_getattro */
3852 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003853 0, /* tp_as_buffer */
3854 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3855 /* tp_flags */
3856 0, /* tp_doc */
3857 (traverseproc)element_gc_traverse, /* tp_traverse */
3858 (inquiry)element_gc_clear, /* tp_clear */
3859 0, /* tp_richcompare */
3860 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3861 0, /* tp_iter */
3862 0, /* tp_iternext */
3863 element_methods, /* tp_methods */
3864 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003865 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003866 0, /* tp_base */
3867 0, /* tp_dict */
3868 0, /* tp_descr_get */
3869 0, /* tp_descr_set */
3870 0, /* tp_dictoffset */
3871 (initproc)element_init, /* tp_init */
3872 PyType_GenericAlloc, /* tp_alloc */
3873 element_new, /* tp_new */
3874 0, /* tp_free */
3875};
3876
3877static PyMethodDef treebuilder_methods[] = {
3878 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3879 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3880 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3881 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3882 {NULL, NULL}
3883};
3884
3885static PyTypeObject TreeBuilder_Type = {
3886 PyVarObject_HEAD_INIT(NULL, 0)
3887 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3888 /* methods */
3889 (destructor)treebuilder_dealloc, /* tp_dealloc */
3890 0, /* tp_print */
3891 0, /* tp_getattr */
3892 0, /* tp_setattr */
3893 0, /* tp_reserved */
3894 0, /* tp_repr */
3895 0, /* tp_as_number */
3896 0, /* tp_as_sequence */
3897 0, /* tp_as_mapping */
3898 0, /* tp_hash */
3899 0, /* tp_call */
3900 0, /* tp_str */
3901 0, /* tp_getattro */
3902 0, /* tp_setattro */
3903 0, /* tp_as_buffer */
3904 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3905 /* tp_flags */
3906 0, /* tp_doc */
3907 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3908 (inquiry)treebuilder_gc_clear, /* tp_clear */
3909 0, /* tp_richcompare */
3910 0, /* tp_weaklistoffset */
3911 0, /* tp_iter */
3912 0, /* tp_iternext */
3913 treebuilder_methods, /* tp_methods */
3914 0, /* tp_members */
3915 0, /* tp_getset */
3916 0, /* tp_base */
3917 0, /* tp_dict */
3918 0, /* tp_descr_get */
3919 0, /* tp_descr_set */
3920 0, /* tp_dictoffset */
3921 _elementtree_TreeBuilder___init__, /* tp_init */
3922 PyType_GenericAlloc, /* tp_alloc */
3923 treebuilder_new, /* tp_new */
3924 0, /* tp_free */
3925};
3926
3927static PyMethodDef xmlparser_methods[] = {
3928 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3929 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3930 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3931 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3932 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3933 {NULL, NULL}
3934};
3935
Neal Norwitz227b5332006-03-22 09:28:35 +00003936static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003937 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003938 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003939 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003940 (destructor)xmlparser_dealloc, /* tp_dealloc */
3941 0, /* tp_print */
3942 0, /* tp_getattr */
3943 0, /* tp_setattr */
3944 0, /* tp_reserved */
3945 0, /* tp_repr */
3946 0, /* tp_as_number */
3947 0, /* tp_as_sequence */
3948 0, /* tp_as_mapping */
3949 0, /* tp_hash */
3950 0, /* tp_call */
3951 0, /* tp_str */
3952 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3953 0, /* tp_setattro */
3954 0, /* tp_as_buffer */
3955 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3956 /* tp_flags */
3957 0, /* tp_doc */
3958 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3959 (inquiry)xmlparser_gc_clear, /* tp_clear */
3960 0, /* tp_richcompare */
3961 0, /* tp_weaklistoffset */
3962 0, /* tp_iter */
3963 0, /* tp_iternext */
3964 xmlparser_methods, /* tp_methods */
3965 0, /* tp_members */
3966 0, /* tp_getset */
3967 0, /* tp_base */
3968 0, /* tp_dict */
3969 0, /* tp_descr_get */
3970 0, /* tp_descr_set */
3971 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003972 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 PyType_GenericAlloc, /* tp_alloc */
3974 xmlparser_new, /* tp_new */
3975 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003976};
3977
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003978/* ==================================================================== */
3979/* python module interface */
3980
3981static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003982 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003983 {NULL, NULL}
3984};
3985
Martin v. Löwis1a214512008-06-11 05:26:20 +00003986
Eli Bendersky532d03e2013-08-10 08:00:39 -07003987static struct PyModuleDef elementtreemodule = {
3988 PyModuleDef_HEAD_INIT,
3989 "_elementtree",
3990 NULL,
3991 sizeof(elementtreestate),
3992 _functions,
3993 NULL,
3994 elementtree_traverse,
3995 elementtree_clear,
3996 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003997};
3998
Neal Norwitzf6657e62006-12-28 04:47:50 +00003999PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00004000PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004001{
Eli Bendersky64d11e62012-06-15 07:42:50 +03004002 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004003 elementtreestate *st;
4004
4005 m = PyState_FindModule(&elementtreemodule);
4006 if (m) {
4007 Py_INCREF(m);
4008 return m;
4009 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004010
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004011 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02004012 if (PyType_Ready(&ElementIter_Type) < 0)
4013 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004014 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004015 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004016 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004017 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00004018 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00004019 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004020
Eli Bendersky532d03e2013-08-10 08:00:39 -07004021 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00004022 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00004023 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004024 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00004025
Eli Bendersky828efde2012-04-05 05:40:58 +03004026 if (!(temp = PyImport_ImportModule("copy")))
4027 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07004028 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03004029 Py_XDECREF(temp);
4030
Victor Stinnerb136f112017-07-10 22:28:02 +02004031 if (st->deepcopy_obj == NULL) {
4032 return NULL;
4033 }
4034
4035 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004036 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004037 return NULL;
4038
Eli Bendersky20d41742012-06-01 09:48:37 +03004039 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004040 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4041 if (expat_capi) {
4042 /* check that it's usable */
4043 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004044 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004045 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4046 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004047 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004048 PyErr_SetString(PyExc_ImportError,
4049 "pyexpat version is incompatible");
4050 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004051 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004052 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004053 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004054 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004055
Eli Bendersky532d03e2013-08-10 08:00:39 -07004056 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004057 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004058 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004059 Py_INCREF(st->parseerror_obj);
4060 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004061
Eli Bendersky092af1f2012-03-04 07:14:03 +02004062 Py_INCREF((PyObject *)&Element_Type);
4063 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4064
Eli Bendersky58d548d2012-05-29 15:45:16 +03004065 Py_INCREF((PyObject *)&TreeBuilder_Type);
4066 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4067
Eli Bendersky52467b12012-06-01 07:13:08 +03004068 Py_INCREF((PyObject *)&XMLParser_Type);
4069 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004070
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004071 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004072}