blob: 857005a2a9b8ad84c223d1d0bc254b1c6584fe81 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300134 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 return result;
144}
145
Eli Bendersky48d358b2012-05-30 17:57:50 +0300146/* Is the given object an empty dictionary?
147*/
148static int
149is_empty_dict(PyObject *obj)
150{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200151 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300152}
153
154
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200156/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157
158typedef struct {
159
160 /* attributes (a dictionary object), or None if no attributes */
161 PyObject* attrib;
162
163 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200164 Py_ssize_t length; /* actual number of items */
165 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166
167 /* this either points to _children or to a malloced buffer */
168 PyObject* *children;
169
170 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000172} ElementObjectExtra;
173
174typedef struct {
175 PyObject_HEAD
176
177 /* element tag (a string). */
178 PyObject* tag;
179
180 /* text before first child. note that this is a tagged pointer;
181 use JOIN_OBJ to get the object pointer. the join flag is used
182 to distinguish lists created by the tree builder from lists
183 assigned to the attribute by application code; the former
184 should be joined before being returned to the user, the latter
185 should be left intact. */
186 PyObject* text;
187
188 /* text after this element, in parent. note that this is a tagged
189 pointer; use JOIN_OBJ to get the object pointer. */
190 PyObject* tail;
191
192 ElementObjectExtra* extra;
193
Eli Benderskyebf37a22012-04-03 22:02:37 +0300194 PyObject *weakreflist; /* For tp_weaklistoffset */
195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObject;
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198
Christian Heimes90aa7642007-12-19 02:45:37 +0000199#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200202/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203
204LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200205create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206{
207 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200208 if (!self->extra) {
209 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200211 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213 if (!attrib)
214 attrib = Py_None;
215
216 Py_INCREF(attrib);
217 self->extra->attrib = attrib;
218
219 self->extra->length = 0;
220 self->extra->allocated = STATIC_CHILDREN;
221 self->extra->children = self->extra->_children;
222
223 return 0;
224}
225
226LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200227dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000228{
Eli Bendersky08b85292012-04-04 15:55:07 +0300229 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200230 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300231
Eli Benderskyebf37a22012-04-03 22:02:37 +0300232 if (!self->extra)
233 return;
234
235 /* Avoid DECREFs calling into this code again (cycles, etc.)
236 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300237 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300238 self->extra = NULL;
239
240 Py_DECREF(myextra->attrib);
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 for (i = 0; i < myextra->length; i++)
243 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 if (myextra->children != myextra->_children)
246 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
Eli Benderskyebf37a22012-04-03 22:02:37 +0300248 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249}
250
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251/* Convenience internal function to create new Element objects with the given
252 * tag and attributes.
253*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
257 ElementObject* self;
258
Eli Bendersky0192ba32012-03-30 16:38:33 +0300259 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260 if (self == NULL)
261 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 self->extra = NULL;
263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 Py_INCREF(tag);
265 self->tag = tag;
266
267 Py_INCREF(Py_None);
268 self->text = Py_None;
269
270 Py_INCREF(Py_None);
271 self->tail = Py_None;
272
Eli Benderskyebf37a22012-04-03 22:02:37 +0300273 self->weakreflist = NULL;
274
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200275 ALLOC(sizeof(ElementObject), "create element");
276 PyObject_GC_Track(self);
277
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200278 if (attrib != Py_None && !is_empty_dict(attrib)) {
279 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200280 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200281 return NULL;
282 }
283 }
284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 return (PyObject*) self;
286}
287
Eli Bendersky092af1f2012-03-04 07:14:03 +0200288static PyObject *
289element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
290{
291 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
292 if (e != NULL) {
293 Py_INCREF(Py_None);
294 e->tag = Py_None;
295
296 Py_INCREF(Py_None);
297 e->text = Py_None;
298
299 Py_INCREF(Py_None);
300 e->tail = Py_None;
301
302 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300303 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200304 }
305 return (PyObject *)e;
306}
307
Eli Bendersky737b1732012-05-29 06:02:56 +0300308/* Helper function for extracting the attrib dictionary from a keywords dict.
309 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800310 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300311 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700312 *
313 * Return a dictionary with the content of kwds merged into the content of
314 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300315 */
316static PyObject*
317get_attrib_from_keywords(PyObject *kwds)
318{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700319 PyObject *attrib_str = PyUnicode_FromString("attrib");
320 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300321
322 if (attrib) {
323 /* If attrib was found in kwds, copy its value and remove it from
324 * kwds
325 */
326 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700327 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
329 Py_TYPE(attrib)->tp_name);
330 return NULL;
331 }
332 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700333 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 } else {
335 attrib = PyDict_New();
336 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700337
338 Py_DECREF(attrib_str);
339
340 /* attrib can be NULL if PyDict_New failed */
341 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200342 if (PyDict_Update(attrib, kwds) < 0)
343 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300344 return attrib;
345}
346
Serhiy Storchakacb985562015-05-04 15:32:48 +0300347/*[clinic input]
348module _elementtree
349class _elementtree.Element "ElementObject *" "&Element_Type"
350class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
351class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
352[clinic start generated code]*/
353/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
354
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355static int
356element_init(PyObject *self, PyObject *args, PyObject *kwds)
357{
358 PyObject *tag;
359 PyObject *tmp;
360 PyObject *attrib = NULL;
361 ElementObject *self_elem;
362
363 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
364 return -1;
365
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 if (attrib) {
367 /* attrib passed as positional arg */
368 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200369 if (!attrib)
370 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300371 if (kwds) {
372 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200373 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 return -1;
375 }
376 }
377 } else if (kwds) {
378 /* have keywords args */
379 attrib = get_attrib_from_keywords(kwds);
380 if (!attrib)
381 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 }
383
384 self_elem = (ElementObject *)self;
385
Antoine Pitrouc1948842012-10-01 23:40:37 +0200386 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 return -1;
390 }
391 }
392
Eli Bendersky48d358b2012-05-30 17:57:50 +0300393 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395
396 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300398 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399
400 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_DECREF(JOIN_OBJ(tmp));
404
405 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_DECREF(JOIN_OBJ(tmp));
409
410 return 0;
411}
412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000413LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200414element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417 PyObject* *children;
418
419 /* make sure self->children can hold the given number of extra
420 elements. set an exception and return -1 if allocation failed */
421
Victor Stinner5f0af232013-07-11 23:01:36 +0200422 if (!self->extra) {
423 if (create_extra(self, NULL) < 0)
424 return -1;
425 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000426
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200427 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
429 if (size > self->extra->allocated) {
430 /* use Python 2.4's list growth strategy */
431 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * which needs at least 4 bytes.
434 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * be safe.
436 */
437 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200438 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
439 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100442 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 * false alarm always assume at least one child to be safe.
444 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 children = PyObject_Realloc(self->extra->children,
446 size * sizeof(PyObject*));
447 if (!children)
448 goto nomemory;
449 } else {
450 children = PyObject_Malloc(size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 /* copy existing children from static area to malloc buffer */
454 memcpy(children, self->extra->children,
455 self->extra->length * sizeof(PyObject*));
456 }
457 self->extra->children = children;
458 self->extra->allocated = size;
459 }
460
461 return 0;
462
463 nomemory:
464 PyErr_NoMemory();
465 return -1;
466}
467
468LOCAL(int)
469element_add_subelement(ElementObject* self, PyObject* element)
470{
471 /* add a child element to a parent */
472
473 if (element_resize(self, 1) < 0)
474 return -1;
475
476 Py_INCREF(element);
477 self->extra->children[self->extra->length] = element;
478
479 self->extra->length++;
480
481 return 0;
482}
483
484LOCAL(PyObject*)
485element_get_attrib(ElementObject* self)
486{
487 /* return borrowed reference to attrib dictionary */
488 /* note: this function assumes that the extra section exists */
489
490 PyObject* res = self->extra->attrib;
491
492 if (res == Py_None) {
493 /* create missing dictionary */
494 res = PyDict_New();
495 if (!res)
496 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200497 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000498 self->extra->attrib = res;
499 }
500
501 return res;
502}
503
504LOCAL(PyObject*)
505element_get_text(ElementObject* self)
506{
507 /* return borrowed reference to text attribute */
508
Serhiy Storchaka576def02017-03-30 09:47:31 +0300509 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000510
511 if (JOIN_GET(res)) {
512 res = JOIN_OBJ(res);
513 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300514 PyObject *tmp = list_join(res);
515 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000516 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300517 self->text = tmp;
518 Py_DECREF(res);
519 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
Serhiy Storchaka576def02017-03-30 09:47:31 +0300531 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300536 PyObject *tmp = list_join(res);
537 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300539 self->tail = tmp;
540 Py_DECREF(res);
541 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
INADA Naokia6296d32017-08-24 14:55:17 +0900630 /* bpo-31095: UnTrack is needed before calling any callbacks */
Eli Bendersky0192ba32012-03-30 16:38:33 +0300631 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200632 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300633
634 if (self->weakreflist != NULL)
635 PyObject_ClearWeakRefs((PyObject *) self);
636
Eli Bendersky0192ba32012-03-30 16:38:33 +0300637 /* element_gc_clear clears all references and deallocates extra
638 */
639 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000640
641 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200642 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200643 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644}
645
646/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648/*[clinic input]
649_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000650
Serhiy Storchakacb985562015-05-04 15:32:48 +0300651 subelement: object(subclass_of='&Element_Type')
652 /
653
654[clinic start generated code]*/
655
656static PyObject *
657_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
658/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
659{
660 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000661 return NULL;
662
663 Py_RETURN_NONE;
664}
665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666/*[clinic input]
667_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669[clinic start generated code]*/
670
671static PyObject *
672_elementtree_Element_clear_impl(ElementObject *self)
673/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
674{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300675 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000676
677 Py_INCREF(Py_None);
678 Py_DECREF(JOIN_OBJ(self->text));
679 self->text = Py_None;
680
681 Py_INCREF(Py_None);
682 Py_DECREF(JOIN_OBJ(self->tail));
683 self->tail = Py_None;
684
685 Py_RETURN_NONE;
686}
687
Serhiy Storchakacb985562015-05-04 15:32:48 +0300688/*[clinic input]
689_elementtree.Element.__copy__
690
691[clinic start generated code]*/
692
693static PyObject *
694_elementtree_Element___copy___impl(ElementObject *self)
695/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000696{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200697 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000698 ElementObject* element;
699
Eli Bendersky092af1f2012-03-04 07:14:03 +0200700 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800701 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000702 if (!element)
703 return NULL;
704
705 Py_DECREF(JOIN_OBJ(element->text));
706 element->text = self->text;
707 Py_INCREF(JOIN_OBJ(element->text));
708
709 Py_DECREF(JOIN_OBJ(element->tail));
710 element->tail = self->tail;
711 Py_INCREF(JOIN_OBJ(element->tail));
712
713 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 if (element_resize(element, self->extra->length) < 0) {
715 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000716 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000717 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000718
719 for (i = 0; i < self->extra->length; i++) {
720 Py_INCREF(self->extra->children[i]);
721 element->extra->children[i] = self->extra->children[i];
722 }
723
724 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 }
726
727 return (PyObject*) element;
728}
729
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200730/* Helper for a deep copy. */
731LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
732
Serhiy Storchakacb985562015-05-04 15:32:48 +0300733/*[clinic input]
734_elementtree.Element.__deepcopy__
735
736 memo: object
737 /
738
739[clinic start generated code]*/
740
741static PyObject *
742_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
743/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000744{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200745 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 ElementObject* element;
747 PyObject* tag;
748 PyObject* attrib;
749 PyObject* text;
750 PyObject* tail;
751 PyObject* id;
752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000753 tag = deepcopy(self->tag, memo);
754 if (!tag)
755 return NULL;
756
757 if (self->extra) {
758 attrib = deepcopy(self->extra->attrib, memo);
759 if (!attrib) {
760 Py_DECREF(tag);
761 return NULL;
762 }
763 } else {
764 Py_INCREF(Py_None);
765 attrib = Py_None;
766 }
767
Eli Bendersky092af1f2012-03-04 07:14:03 +0200768 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000769
770 Py_DECREF(tag);
771 Py_DECREF(attrib);
772
773 if (!element)
774 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000776 text = deepcopy(JOIN_OBJ(self->text), memo);
777 if (!text)
778 goto error;
779 Py_DECREF(element->text);
780 element->text = JOIN_SET(text, JOIN_GET(self->text));
781
782 tail = deepcopy(JOIN_OBJ(self->tail), memo);
783 if (!tail)
784 goto error;
785 Py_DECREF(element->tail);
786 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
787
788 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789 if (element_resize(element, self->extra->length) < 0)
790 goto error;
791
792 for (i = 0; i < self->extra->length; i++) {
793 PyObject* child = deepcopy(self->extra->children[i], memo);
794 if (!child) {
795 element->extra->length = i;
796 goto error;
797 }
798 element->extra->children[i] = child;
799 }
800
801 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000802 }
803
804 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700805 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000806 if (!id)
807 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000808
809 i = PyDict_SetItem(memo, id, (PyObject*) element);
810
811 Py_DECREF(id);
812
813 if (i < 0)
814 goto error;
815
816 return (PyObject*) element;
817
818 error:
819 Py_DECREF(element);
820 return NULL;
821}
822
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200823LOCAL(PyObject *)
824deepcopy(PyObject *object, PyObject *memo)
825{
826 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200827 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200828 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200829
830 /* Fast paths */
831 if (object == Py_None || PyUnicode_CheckExact(object)) {
832 Py_INCREF(object);
833 return object;
834 }
835
836 if (Py_REFCNT(object) == 1) {
837 if (PyDict_CheckExact(object)) {
838 PyObject *key, *value;
839 Py_ssize_t pos = 0;
840 int simple = 1;
841 while (PyDict_Next(object, &pos, &key, &value)) {
842 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
843 simple = 0;
844 break;
845 }
846 }
847 if (simple)
848 return PyDict_Copy(object);
849 /* Fall through to general case */
850 }
851 else if (Element_CheckExact(object)) {
852 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
853 }
854 }
855
856 /* General case */
857 st = ET_STATE_GLOBAL;
858 if (!st->deepcopy_obj) {
859 PyErr_SetString(PyExc_RuntimeError,
860 "deepcopy helper not found");
861 return NULL;
862 }
863
Victor Stinner7fbac452016-08-20 01:34:44 +0200864 stack[0] = object;
865 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200866 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200867}
868
869
Serhiy Storchakacb985562015-05-04 15:32:48 +0300870/*[clinic input]
871_elementtree.Element.__sizeof__ -> Py_ssize_t
872
873[clinic start generated code]*/
874
875static Py_ssize_t
876_elementtree_Element___sizeof___impl(ElementObject *self)
877/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200878{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200879 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200880 if (self->extra) {
881 result += sizeof(ElementObjectExtra);
882 if (self->extra->children != self->extra->_children)
883 result += sizeof(PyObject*) * self->extra->allocated;
884 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300885 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200886}
887
Eli Bendersky698bdb22013-01-10 06:01:06 -0800888/* dict keys for getstate/setstate. */
889#define PICKLED_TAG "tag"
890#define PICKLED_CHILDREN "_children"
891#define PICKLED_ATTRIB "attrib"
892#define PICKLED_TAIL "tail"
893#define PICKLED_TEXT "text"
894
895/* __getstate__ returns a fabricated instance dict as in the pure-Python
896 * Element implementation, for interoperability/interchangeability. This
897 * makes the pure-Python implementation details an API, but (a) there aren't
898 * any unnecessary structures there; and (b) it buys compatibility with 3.2
899 * pickles. See issue #16076.
900 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300901/*[clinic input]
902_elementtree.Element.__getstate__
903
904[clinic start generated code]*/
905
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300907_elementtree_Element___getstate___impl(ElementObject *self)
908/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800909{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200910 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800911 PyObject *instancedict = NULL, *children;
912
913 /* Build a list of children. */
914 children = PyList_New(self->extra ? self->extra->length : 0);
915 if (!children)
916 return NULL;
917 for (i = 0; i < PyList_GET_SIZE(children); i++) {
918 PyObject *child = self->extra->children[i];
919 Py_INCREF(child);
920 PyList_SET_ITEM(children, i, child);
921 }
922
923 /* Construct the state object. */
924 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
925 if (noattrib)
926 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
927 PICKLED_TAG, self->tag,
928 PICKLED_CHILDREN, children,
929 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700930 PICKLED_TEXT, JOIN_OBJ(self->text),
931 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800932 else
933 instancedict = Py_BuildValue("{sOsOsOsOsO}",
934 PICKLED_TAG, self->tag,
935 PICKLED_CHILDREN, children,
936 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700937 PICKLED_TEXT, JOIN_OBJ(self->text),
938 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800939 if (instancedict) {
940 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800941 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800942 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800943 else {
944 for (i = 0; i < PyList_GET_SIZE(children); i++)
945 Py_DECREF(PyList_GET_ITEM(children, i));
946 Py_DECREF(children);
947
948 return NULL;
949 }
950}
951
952static PyObject *
953element_setstate_from_attributes(ElementObject *self,
954 PyObject *tag,
955 PyObject *attrib,
956 PyObject *text,
957 PyObject *tail,
958 PyObject *children)
959{
960 Py_ssize_t i, nchildren;
961
962 if (!tag) {
963 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
964 return NULL;
965 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200967 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300968 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800969
Eli Benderskydd3661e2013-09-13 06:24:25 -0700970 _clear_joined_ptr(&self->text);
971 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
972 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800973
Eli Benderskydd3661e2013-09-13 06:24:25 -0700974 _clear_joined_ptr(&self->tail);
975 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
976 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800977
978 /* Handle ATTRIB and CHILDREN. */
979 if (!children && !attrib)
980 Py_RETURN_NONE;
981
982 /* Compute 'nchildren'. */
983 if (children) {
984 if (!PyList_Check(children)) {
985 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
986 return NULL;
987 }
988 nchildren = PyList_Size(children);
989 }
990 else {
991 nchildren = 0;
992 }
993
994 /* Allocate 'extra'. */
995 if (element_resize(self, nchildren)) {
996 return NULL;
997 }
998 assert(self->extra && self->extra->allocated >= nchildren);
999
1000 /* Copy children */
1001 for (i = 0; i < nchildren; i++) {
1002 self->extra->children[i] = PyList_GET_ITEM(children, i);
1003 Py_INCREF(self->extra->children[i]);
1004 }
1005
1006 self->extra->length = nchildren;
1007 self->extra->allocated = nchildren;
1008
1009 /* Stash attrib. */
1010 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001012 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 }
1014
1015 Py_RETURN_NONE;
1016}
1017
1018/* __setstate__ for Element instance from the Python implementation.
1019 * 'state' should be the instance dict.
1020 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001021
Eli Bendersky698bdb22013-01-10 06:01:06 -08001022static PyObject *
1023element_setstate_from_Python(ElementObject *self, PyObject *state)
1024{
1025 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1026 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1027 PyObject *args;
1028 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001029 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031 tag = attrib = text = tail = children = NULL;
1032 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001033 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001034 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001035
1036 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1037 &attrib, &text, &tail, &children))
1038 retval = element_setstate_from_attributes(self, tag, attrib, text,
1039 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001041 retval = NULL;
1042
1043 Py_DECREF(args);
1044 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001045}
1046
Serhiy Storchakacb985562015-05-04 15:32:48 +03001047/*[clinic input]
1048_elementtree.Element.__setstate__
1049
1050 state: object
1051 /
1052
1053[clinic start generated code]*/
1054
Eli Bendersky698bdb22013-01-10 06:01:06 -08001055static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001056_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1057/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001058{
1059 if (!PyDict_CheckExact(state)) {
1060 PyErr_Format(PyExc_TypeError,
1061 "Don't know how to unpickle \"%.200R\" as an Element",
1062 state);
1063 return NULL;
1064 }
1065 else
1066 return element_setstate_from_Python(self, state);
1067}
1068
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069LOCAL(int)
1070checkpath(PyObject* tag)
1071{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001072 Py_ssize_t i;
1073 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001074
1075 /* check if a tag contains an xpath character */
1076
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001077#define PATHCHAR(ch) \
1078 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001080 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001081 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1082 void *data = PyUnicode_DATA(tag);
1083 unsigned int kind = PyUnicode_KIND(tag);
1084 for (i = 0; i < len; i++) {
1085 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1086 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001087 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001088 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001089 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001090 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001091 return 1;
1092 }
1093 return 0;
1094 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001095 if (PyBytes_Check(tag)) {
1096 char *p = PyBytes_AS_STRING(tag);
1097 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001098 if (p[i] == '{')
1099 check = 0;
1100 else if (p[i] == '}')
1101 check = 1;
1102 else if (check && PATHCHAR(p[i]))
1103 return 1;
1104 }
1105 return 0;
1106 }
1107
1108 return 1; /* unknown type; might be path expression */
1109}
1110
Serhiy Storchakacb985562015-05-04 15:32:48 +03001111/*[clinic input]
1112_elementtree.Element.extend
1113
1114 elements: object
1115 /
1116
1117[clinic start generated code]*/
1118
1119static PyObject *
1120_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1121/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122{
1123 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001124 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125
Serhiy Storchakacb985562015-05-04 15:32:48 +03001126 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 if (!seq) {
1128 PyErr_Format(
1129 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001130 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 );
1132 return NULL;
1133 }
1134
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001135 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001136 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001137 Py_INCREF(element);
1138 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001139 PyErr_Format(
1140 PyExc_TypeError,
1141 "expected an Element, not \"%.200s\"",
1142 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001143 Py_DECREF(seq);
1144 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001145 return NULL;
1146 }
1147
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 if (element_add_subelement(self, element) < 0) {
1149 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001150 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 return NULL;
1152 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001153 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001154 }
1155
1156 Py_DECREF(seq);
1157
1158 Py_RETURN_NONE;
1159}
1160
Serhiy Storchakacb985562015-05-04 15:32:48 +03001161/*[clinic input]
1162_elementtree.Element.find
1163
1164 path: object
1165 namespaces: object = None
1166
1167[clinic start generated code]*/
1168
1169static PyObject *
1170_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1171 PyObject *namespaces)
1172/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001173{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001174 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001175 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001178 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001179 return _PyObject_CallMethodIdObjArgs(
1180 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001181 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001182 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001183
1184 if (!self->extra)
1185 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001186
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001187 for (i = 0; i < self->extra->length; i++) {
1188 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001189 int rc;
1190 if (!Element_CheckExact(item))
1191 continue;
1192 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001193 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001194 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001195 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001196 Py_DECREF(item);
1197 if (rc < 0)
1198 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 }
1200
1201 Py_RETURN_NONE;
1202}
1203
Serhiy Storchakacb985562015-05-04 15:32:48 +03001204/*[clinic input]
1205_elementtree.Element.findtext
1206
1207 path: object
1208 default: object = None
1209 namespaces: object = None
1210
1211[clinic start generated code]*/
1212
1213static PyObject *
1214_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1215 PyObject *default_value,
1216 PyObject *namespaces)
1217/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001218{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001219 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001220 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001221 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001222
Serhiy Storchakacb985562015-05-04 15:32:48 +03001223 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001224 return _PyObject_CallMethodIdObjArgs(
1225 st->elementpath_obj, &PyId_findtext,
1226 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227 );
1228
1229 if (!self->extra) {
1230 Py_INCREF(default_value);
1231 return default_value;
1232 }
1233
1234 for (i = 0; i < self->extra->length; i++) {
1235 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001236 int rc;
1237 if (!Element_CheckExact(item))
1238 continue;
1239 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001240 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 if (text == Py_None) {
1244 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001245 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001247 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 return text;
1250 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 }
1255
1256 Py_INCREF(default_value);
1257 return default_value;
1258}
1259
Serhiy Storchakacb985562015-05-04 15:32:48 +03001260/*[clinic input]
1261_elementtree.Element.findall
1262
1263 path: object
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1270 PyObject *namespaces)
1271/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001273 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001275 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001276 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001279 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001280 return _PyObject_CallMethodIdObjArgs(
1281 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001283 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284
1285 out = PyList_New(0);
1286 if (!out)
1287 return NULL;
1288
1289 if (!self->extra)
1290 return out;
1291
1292 for (i = 0; i < self->extra->length; i++) {
1293 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 int rc;
1295 if (!Element_CheckExact(item))
1296 continue;
1297 Py_INCREF(item);
1298 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1299 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1300 Py_DECREF(item);
1301 Py_DECREF(out);
1302 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001304 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 }
1306
1307 return out;
1308}
1309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310/*[clinic input]
1311_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001312
Serhiy Storchakacb985562015-05-04 15:32:48 +03001313 path: object
1314 namespaces: object = None
1315
1316[clinic start generated code]*/
1317
1318static PyObject *
1319_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1320 PyObject *namespaces)
1321/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1322{
1323 PyObject* tag = path;
1324 _Py_IDENTIFIER(iterfind);
1325 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326
Victor Stinnerf5616342016-12-09 15:26:00 +01001327 return _PyObject_CallMethodIdObjArgs(
1328 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001329}
1330
Serhiy Storchakacb985562015-05-04 15:32:48 +03001331/*[clinic input]
1332_elementtree.Element.get
1333
1334 key: object
1335 default: object = None
1336
1337[clinic start generated code]*/
1338
1339static PyObject *
1340_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1341 PyObject *default_value)
1342/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343{
1344 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345
1346 if (!self->extra || self->extra->attrib == Py_None)
1347 value = default_value;
1348 else {
1349 value = PyDict_GetItem(self->extra->attrib, key);
1350 if (!value)
1351 value = default_value;
1352 }
1353
1354 Py_INCREF(value);
1355 return value;
1356}
1357
Serhiy Storchakacb985562015-05-04 15:32:48 +03001358/*[clinic input]
1359_elementtree.Element.getchildren
1360
1361[clinic start generated code]*/
1362
1363static PyObject *
1364_elementtree_Element_getchildren_impl(ElementObject *self)
1365/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001367 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368 PyObject* list;
1369
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001370 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1371 "This method will be removed in future versions. "
1372 "Use 'list(elem)' or iteration over elem instead.",
1373 1) < 0) {
1374 return NULL;
1375 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001376
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001377 if (!self->extra)
1378 return PyList_New(0);
1379
1380 list = PyList_New(self->extra->length);
1381 if (!list)
1382 return NULL;
1383
1384 for (i = 0; i < self->extra->length; i++) {
1385 PyObject* item = self->extra->children[i];
1386 Py_INCREF(item);
1387 PyList_SET_ITEM(list, i, item);
1388 }
1389
1390 return list;
1391}
1392
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001393
Eli Bendersky64d11e62012-06-15 07:42:50 +03001394static PyObject *
1395create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1396
1397
Serhiy Storchakacb985562015-05-04 15:32:48 +03001398/*[clinic input]
1399_elementtree.Element.iter
1400
1401 tag: object = None
1402
1403[clinic start generated code]*/
1404
Eli Bendersky64d11e62012-06-15 07:42:50 +03001405static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001406_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1407/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001408{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001409 if (PyUnicode_Check(tag)) {
1410 if (PyUnicode_READY(tag) < 0)
1411 return NULL;
1412 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1413 tag = Py_None;
1414 }
1415 else if (PyBytes_Check(tag)) {
1416 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1417 tag = Py_None;
1418 }
1419
Eli Bendersky64d11e62012-06-15 07:42:50 +03001420 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001421}
1422
1423
Serhiy Storchakacb985562015-05-04 15:32:48 +03001424/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001425_elementtree.Element.getiterator
1426
1427 tag: object = None
1428
1429[clinic start generated code]*/
1430
1431static PyObject *
1432_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1433/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1434{
1435 /* Change for a DeprecationWarning in 1.4 */
1436 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1437 "This method will be removed in future versions. "
1438 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1439 1) < 0) {
1440 return NULL;
1441 }
1442 return _elementtree_Element_iter_impl(self, tag);
1443}
1444
1445
1446/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001447_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001448
Serhiy Storchakacb985562015-05-04 15:32:48 +03001449[clinic start generated code]*/
1450
1451static PyObject *
1452_elementtree_Element_itertext_impl(ElementObject *self)
1453/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1454{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001455 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456}
1457
Eli Bendersky64d11e62012-06-15 07:42:50 +03001458
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001460element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001461{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001462 ElementObject* self = (ElementObject*) self_;
1463
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464 if (!self->extra || index < 0 || index >= self->extra->length) {
1465 PyErr_SetString(
1466 PyExc_IndexError,
1467 "child index out of range"
1468 );
1469 return NULL;
1470 }
1471
1472 Py_INCREF(self->extra->children[index]);
1473 return self->extra->children[index];
1474}
1475
Serhiy Storchakacb985562015-05-04 15:32:48 +03001476/*[clinic input]
1477_elementtree.Element.insert
1478
1479 index: Py_ssize_t
1480 subelement: object(subclass_of='&Element_Type')
1481 /
1482
1483[clinic start generated code]*/
1484
1485static PyObject *
1486_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1487 PyObject *subelement)
1488/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491
Victor Stinner5f0af232013-07-11 23:01:36 +02001492 if (!self->extra) {
1493 if (create_extra(self, NULL) < 0)
1494 return NULL;
1495 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001497 if (index < 0) {
1498 index += self->extra->length;
1499 if (index < 0)
1500 index = 0;
1501 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001502 if (index > self->extra->length)
1503 index = self->extra->length;
1504
1505 if (element_resize(self, 1) < 0)
1506 return NULL;
1507
1508 for (i = self->extra->length; i > index; i--)
1509 self->extra->children[i] = self->extra->children[i-1];
1510
Serhiy Storchakacb985562015-05-04 15:32:48 +03001511 Py_INCREF(subelement);
1512 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513
1514 self->extra->length++;
1515
1516 Py_RETURN_NONE;
1517}
1518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519/*[clinic input]
1520_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521
Serhiy Storchakacb985562015-05-04 15:32:48 +03001522[clinic start generated code]*/
1523
1524static PyObject *
1525_elementtree_Element_items_impl(ElementObject *self)
1526/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1527{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001528 if (!self->extra || self->extra->attrib == Py_None)
1529 return PyList_New(0);
1530
1531 return PyDict_Items(self->extra->attrib);
1532}
1533
Serhiy Storchakacb985562015-05-04 15:32:48 +03001534/*[clinic input]
1535_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001536
Serhiy Storchakacb985562015-05-04 15:32:48 +03001537[clinic start generated code]*/
1538
1539static PyObject *
1540_elementtree_Element_keys_impl(ElementObject *self)
1541/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1542{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543 if (!self->extra || self->extra->attrib == Py_None)
1544 return PyList_New(0);
1545
1546 return PyDict_Keys(self->extra->attrib);
1547}
1548
Martin v. Löwis18e16552006-02-15 17:27:45 +00001549static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001550element_length(ElementObject* self)
1551{
1552 if (!self->extra)
1553 return 0;
1554
1555 return self->extra->length;
1556}
1557
Serhiy Storchakacb985562015-05-04 15:32:48 +03001558/*[clinic input]
1559_elementtree.Element.makeelement
1560
1561 tag: object
1562 attrib: object
1563 /
1564
1565[clinic start generated code]*/
1566
1567static PyObject *
1568_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1569 PyObject *attrib)
1570/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571{
1572 PyObject* elem;
1573
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 attrib = PyDict_Copy(attrib);
1575 if (!attrib)
1576 return NULL;
1577
Eli Bendersky092af1f2012-03-04 07:14:03 +02001578 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579
1580 Py_DECREF(attrib);
1581
1582 return elem;
1583}
1584
Serhiy Storchakacb985562015-05-04 15:32:48 +03001585/*[clinic input]
1586_elementtree.Element.remove
1587
1588 subelement: object(subclass_of='&Element_Type')
1589 /
1590
1591[clinic start generated code]*/
1592
1593static PyObject *
1594_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1595/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001596{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001597 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001598 int rc;
1599 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601 if (!self->extra) {
1602 /* element has no children, so raise exception */
1603 PyErr_SetString(
1604 PyExc_ValueError,
1605 "list.remove(x): x not in list"
1606 );
1607 return NULL;
1608 }
1609
1610 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001611 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001612 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001613 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001614 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001616 if (rc < 0)
1617 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618 }
1619
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001620 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001621 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001622 PyErr_SetString(
1623 PyExc_ValueError,
1624 "list.remove(x): x not in list"
1625 );
1626 return NULL;
1627 }
1628
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001629 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630
1631 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632 for (; i < self->extra->length; i++)
1633 self->extra->children[i] = self->extra->children[i+1];
1634
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001635 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636 Py_RETURN_NONE;
1637}
1638
1639static PyObject*
1640element_repr(ElementObject* self)
1641{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001642 int status;
1643
1644 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001645 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001646
1647 status = Py_ReprEnter((PyObject *)self);
1648 if (status == 0) {
1649 PyObject *res;
1650 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1651 Py_ReprLeave((PyObject *)self);
1652 return res;
1653 }
1654 if (status > 0)
1655 PyErr_Format(PyExc_RuntimeError,
1656 "reentrant call inside %s.__repr__",
1657 Py_TYPE(self)->tp_name);
1658 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001659}
1660
Serhiy Storchakacb985562015-05-04 15:32:48 +03001661/*[clinic input]
1662_elementtree.Element.set
1663
1664 key: object
1665 value: object
1666 /
1667
1668[clinic start generated code]*/
1669
1670static PyObject *
1671_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1672 PyObject *value)
1673/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001674{
1675 PyObject* attrib;
1676
Victor Stinner5f0af232013-07-11 23:01:36 +02001677 if (!self->extra) {
1678 if (create_extra(self, NULL) < 0)
1679 return NULL;
1680 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001681
1682 attrib = element_get_attrib(self);
1683 if (!attrib)
1684 return NULL;
1685
1686 if (PyDict_SetItem(attrib, key, value) < 0)
1687 return NULL;
1688
1689 Py_RETURN_NONE;
1690}
1691
1692static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001693element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001694{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001695 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001696 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001697 PyObject* old;
1698
1699 if (!self->extra || index < 0 || index >= self->extra->length) {
1700 PyErr_SetString(
1701 PyExc_IndexError,
1702 "child assignment index out of range");
1703 return -1;
1704 }
1705
1706 old = self->extra->children[index];
1707
1708 if (item) {
1709 Py_INCREF(item);
1710 self->extra->children[index] = item;
1711 } else {
1712 self->extra->length--;
1713 for (i = index; i < self->extra->length; i++)
1714 self->extra->children[i] = self->extra->children[i+1];
1715 }
1716
1717 Py_DECREF(old);
1718
1719 return 0;
1720}
1721
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001722static PyObject*
1723element_subscr(PyObject* self_, PyObject* item)
1724{
1725 ElementObject* self = (ElementObject*) self_;
1726
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001727 if (PyIndex_Check(item)) {
1728 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001729
1730 if (i == -1 && PyErr_Occurred()) {
1731 return NULL;
1732 }
1733 if (i < 0 && self->extra)
1734 i += self->extra->length;
1735 return element_getitem(self_, i);
1736 }
1737 else if (PySlice_Check(item)) {
1738 Py_ssize_t start, stop, step, slicelen, cur, i;
1739 PyObject* list;
1740
1741 if (!self->extra)
1742 return PyList_New(0);
1743
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001744 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001745 return NULL;
1746 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001747 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1748 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749
1750 if (slicelen <= 0)
1751 return PyList_New(0);
1752 else {
1753 list = PyList_New(slicelen);
1754 if (!list)
1755 return NULL;
1756
1757 for (cur = start, i = 0; i < slicelen;
1758 cur += step, i++) {
1759 PyObject* item = self->extra->children[cur];
1760 Py_INCREF(item);
1761 PyList_SET_ITEM(list, i, item);
1762 }
1763
1764 return list;
1765 }
1766 }
1767 else {
1768 PyErr_SetString(PyExc_TypeError,
1769 "element indices must be integers");
1770 return NULL;
1771 }
1772}
1773
1774static int
1775element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1776{
1777 ElementObject* self = (ElementObject*) self_;
1778
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001779 if (PyIndex_Check(item)) {
1780 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001781
1782 if (i == -1 && PyErr_Occurred()) {
1783 return -1;
1784 }
1785 if (i < 0 && self->extra)
1786 i += self->extra->length;
1787 return element_setitem(self_, i, value);
1788 }
1789 else if (PySlice_Check(item)) {
1790 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1791
1792 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001793 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001794
Victor Stinner5f0af232013-07-11 23:01:36 +02001795 if (!self->extra) {
1796 if (create_extra(self, NULL) < 0)
1797 return -1;
1798 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001799
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001800 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 return -1;
1802 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001803 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1804 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001805
Eli Bendersky865756a2012-03-09 13:38:15 +02001806 if (value == NULL) {
1807 /* Delete slice */
1808 size_t cur;
1809 Py_ssize_t i;
1810
1811 if (slicelen <= 0)
1812 return 0;
1813
1814 /* Since we're deleting, the direction of the range doesn't matter,
1815 * so for simplicity make it always ascending.
1816 */
1817 if (step < 0) {
1818 stop = start + 1;
1819 start = stop + step * (slicelen - 1) - 1;
1820 step = -step;
1821 }
1822
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001823 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001824
1825 /* recycle is a list that will contain all the children
1826 * scheduled for removal.
1827 */
1828 if (!(recycle = PyList_New(slicelen))) {
1829 PyErr_NoMemory();
1830 return -1;
1831 }
1832
1833 /* This loop walks over all the children that have to be deleted,
1834 * with cur pointing at them. num_moved is the amount of children
1835 * until the next deleted child that have to be "shifted down" to
1836 * occupy the deleted's places.
1837 * Note that in the ith iteration, shifting is done i+i places down
1838 * because i children were already removed.
1839 */
1840 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1841 /* Compute how many children have to be moved, clipping at the
1842 * list end.
1843 */
1844 Py_ssize_t num_moved = step - 1;
1845 if (cur + step >= (size_t)self->extra->length) {
1846 num_moved = self->extra->length - cur - 1;
1847 }
1848
1849 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1850
1851 memmove(
1852 self->extra->children + cur - i,
1853 self->extra->children + cur + 1,
1854 num_moved * sizeof(PyObject *));
1855 }
1856
1857 /* Leftover "tail" after the last removed child */
1858 cur = start + (size_t)slicelen * step;
1859 if (cur < (size_t)self->extra->length) {
1860 memmove(
1861 self->extra->children + cur - slicelen,
1862 self->extra->children + cur,
1863 (self->extra->length - cur) * sizeof(PyObject *));
1864 }
1865
1866 self->extra->length -= slicelen;
1867
1868 /* Discard the recycle list with all the deleted sub-elements */
1869 Py_XDECREF(recycle);
1870 return 0;
1871 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001872
1873 /* A new slice is actually being assigned */
1874 seq = PySequence_Fast(value, "");
1875 if (!seq) {
1876 PyErr_Format(
1877 PyExc_TypeError,
1878 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1879 );
1880 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001882 newlen = PySequence_Fast_GET_SIZE(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883
1884 if (step != 1 && newlen != slicelen)
1885 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001886 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001888 "attempt to assign sequence of size %zd "
1889 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001890 newlen, slicelen
1891 );
1892 return -1;
1893 }
1894
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001895 /* Resize before creating the recycle bin, to prevent refleaks. */
1896 if (newlen > slicelen) {
1897 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001898 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001899 return -1;
1900 }
1901 }
1902
1903 if (slicelen > 0) {
1904 /* to avoid recursive calls to this method (via decref), move
1905 old items to the recycle bin here, and get rid of them when
1906 we're done modifying the element */
1907 recycle = PyList_New(slicelen);
1908 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001909 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001910 return -1;
1911 }
1912 for (cur = start, i = 0; i < slicelen;
1913 cur += step, i++)
1914 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1915 }
1916
1917 if (newlen < slicelen) {
1918 /* delete slice */
1919 for (i = stop; i < self->extra->length; i++)
1920 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1921 } else if (newlen > slicelen) {
1922 /* insert slice */
1923 for (i = self->extra->length-1; i >= stop; i--)
1924 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1925 }
1926
1927 /* replace the slice */
1928 for (cur = start, i = 0; i < newlen;
1929 cur += step, i++) {
1930 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1931 Py_INCREF(element);
1932 self->extra->children[cur] = element;
1933 }
1934
1935 self->extra->length += newlen - slicelen;
1936
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001937 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001938
1939 /* discard the recycle bin, and everything in it */
1940 Py_XDECREF(recycle);
1941
1942 return 0;
1943 }
1944 else {
1945 PyErr_SetString(PyExc_TypeError,
1946 "element indices must be integers");
1947 return -1;
1948 }
1949}
1950
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001951static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001952element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001953{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001954 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001956 return res;
1957}
1958
Serhiy Storchakadde08152015-11-25 15:28:13 +02001959static PyObject*
1960element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001961{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001962 PyObject *res = element_get_text(self);
1963 Py_XINCREF(res);
1964 return res;
1965}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001966
Serhiy Storchakadde08152015-11-25 15:28:13 +02001967static PyObject*
1968element_tail_getter(ElementObject *self, void *closure)
1969{
1970 PyObject *res = element_get_tail(self);
1971 Py_XINCREF(res);
1972 return res;
1973}
1974
1975static PyObject*
1976element_attrib_getter(ElementObject *self, void *closure)
1977{
1978 PyObject *res;
1979 if (!self->extra) {
1980 if (create_extra(self, NULL) < 0)
1981 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001982 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001983 res = element_get_attrib(self);
1984 Py_XINCREF(res);
1985 return res;
1986}
Victor Stinner4d463432013-07-11 23:05:03 +02001987
Serhiy Storchakadde08152015-11-25 15:28:13 +02001988/* macro for setter validation */
1989#define _VALIDATE_ATTR_VALUE(V) \
1990 if ((V) == NULL) { \
1991 PyErr_SetString( \
1992 PyExc_AttributeError, \
1993 "can't delete element attribute"); \
1994 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001995 }
1996
Serhiy Storchakadde08152015-11-25 15:28:13 +02001997static int
1998element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1999{
2000 _VALIDATE_ATTR_VALUE(value);
2001 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002002 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002003 return 0;
2004}
2005
2006static int
2007element_text_setter(ElementObject *self, PyObject *value, void *closure)
2008{
2009 _VALIDATE_ATTR_VALUE(value);
2010 Py_INCREF(value);
2011 Py_DECREF(JOIN_OBJ(self->text));
2012 self->text = value;
2013 return 0;
2014}
2015
2016static int
2017element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2018{
2019 _VALIDATE_ATTR_VALUE(value);
2020 Py_INCREF(value);
2021 Py_DECREF(JOIN_OBJ(self->tail));
2022 self->tail = value;
2023 return 0;
2024}
2025
2026static int
2027element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2028{
2029 _VALIDATE_ATTR_VALUE(value);
2030 if (!self->extra) {
2031 if (create_extra(self, NULL) < 0)
2032 return -1;
2033 }
2034 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002035 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002036 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002037}
2038
2039static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002040 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002041 0, /* sq_concat */
2042 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002043 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002044 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002045 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002046 0,
2047};
2048
Eli Bendersky64d11e62012-06-15 07:42:50 +03002049/******************************* Element iterator ****************************/
2050
2051/* ElementIterObject represents the iteration state over an XML element in
2052 * pre-order traversal. To keep track of which sub-element should be returned
2053 * next, a stack of parents is maintained. This is a standard stack-based
2054 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002055 * The stack is managed using a continuous array.
2056 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002057 * the current one is exhausted, and the next child to examine in that parent.
2058 */
2059typedef struct ParentLocator_t {
2060 ElementObject *parent;
2061 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002062} ParentLocator;
2063
2064typedef struct {
2065 PyObject_HEAD
2066 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002067 Py_ssize_t parent_stack_used;
2068 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002069 ElementObject *root_element;
2070 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002071 int gettext;
2072} ElementIterObject;
2073
2074
2075static void
2076elementiter_dealloc(ElementIterObject *it)
2077{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002078 Py_ssize_t i = it->parent_stack_used;
2079 it->parent_stack_used = 0;
INADA Naokia6296d32017-08-24 14:55:17 +09002080 /* bpo-31095: UnTrack is needed before calling any callbacks */
2081 PyObject_GC_UnTrack(it);
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002082 while (i--)
2083 Py_XDECREF(it->parent_stack[i].parent);
2084 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002085
2086 Py_XDECREF(it->sought_tag);
2087 Py_XDECREF(it->root_element);
2088
Eli Bendersky64d11e62012-06-15 07:42:50 +03002089 PyObject_GC_Del(it);
2090}
2091
2092static int
2093elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2094{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002095 Py_ssize_t i = it->parent_stack_used;
2096 while (i--)
2097 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098
2099 Py_VISIT(it->root_element);
2100 Py_VISIT(it->sought_tag);
2101 return 0;
2102}
2103
2104/* Helper function for elementiter_next. Add a new parent to the parent stack.
2105 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002106static int
2107parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002108{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002109 ParentLocator *item;
2110
2111 if (it->parent_stack_used >= it->parent_stack_size) {
2112 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2113 ParentLocator *parent_stack = it->parent_stack;
2114 PyMem_Resize(parent_stack, ParentLocator, new_size);
2115 if (parent_stack == NULL)
2116 return -1;
2117 it->parent_stack = parent_stack;
2118 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002120 item = it->parent_stack + it->parent_stack_used++;
2121 Py_INCREF(parent);
2122 item->parent = parent;
2123 item->child_index = 0;
2124 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125}
2126
2127static PyObject *
2128elementiter_next(ElementIterObject *it)
2129{
2130 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002131 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132 * A short note on gettext: this function serves both the iter() and
2133 * itertext() methods to avoid code duplication. However, there are a few
2134 * small differences in the way these iterations work. Namely:
2135 * - itertext() only yields text from nodes that have it, and continues
2136 * iterating when a node doesn't have text (so it doesn't return any
2137 * node like iter())
2138 * - itertext() also has to handle tail, after finishing with all the
2139 * children of a node.
2140 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002141 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002142 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002143 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002144
2145 while (1) {
2146 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002147 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 * iterator is exhausted.
2149 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002150 if (!it->parent_stack_used) {
2151 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 PyErr_SetNone(PyExc_StopIteration);
2153 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154 }
2155
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002156 elem = it->root_element; /* steals a reference */
2157 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158 }
2159 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002160 /* See if there are children left to traverse in the current parent. If
2161 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002162 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002163 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2164 Py_ssize_t child_index = item->child_index;
2165 ElementObjectExtra *extra;
2166 elem = item->parent;
2167 extra = elem->extra;
2168 if (!extra || child_index >= extra->length) {
2169 it->parent_stack_used--;
2170 /* Note that extra condition on it->parent_stack_used here;
2171 * this is because itertext() is supposed to only return *inner*
2172 * text, not text following the element it began iteration with.
2173 */
2174 if (it->gettext && it->parent_stack_used) {
2175 text = element_get_tail(elem);
2176 goto gettext;
2177 }
2178 Py_DECREF(elem);
2179 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002180 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002181
Serhiy Storchaka576def02017-03-30 09:47:31 +03002182 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2183 PyErr_Format(PyExc_AttributeError,
2184 "'%.100s' object has no attribute 'iter'",
2185 Py_TYPE(extra->children[child_index])->tp_name);
2186 return NULL;
2187 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002188 elem = (ElementObject *)extra->children[child_index];
2189 item->child_index++;
2190 Py_INCREF(elem);
2191 }
2192
2193 if (parent_stack_push_new(it, elem) < 0) {
2194 Py_DECREF(elem);
2195 PyErr_NoMemory();
2196 return NULL;
2197 }
2198 if (it->gettext) {
2199 text = element_get_text(elem);
2200 goto gettext;
2201 }
2202
2203 if (it->sought_tag == Py_None)
2204 return (PyObject *)elem;
2205
2206 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2207 if (rc > 0)
2208 return (PyObject *)elem;
2209
2210 Py_DECREF(elem);
2211 if (rc < 0)
2212 return NULL;
2213 continue;
2214
2215gettext:
2216 if (!text) {
2217 Py_DECREF(elem);
2218 return NULL;
2219 }
2220 if (text == Py_None) {
2221 Py_DECREF(elem);
2222 }
2223 else {
2224 Py_INCREF(text);
2225 Py_DECREF(elem);
2226 rc = PyObject_IsTrue(text);
2227 if (rc > 0)
2228 return text;
2229 Py_DECREF(text);
2230 if (rc < 0)
2231 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002232 }
2233 }
2234
2235 return NULL;
2236}
2237
2238
2239static PyTypeObject ElementIter_Type = {
2240 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002241 /* Using the module's name since the pure-Python implementation does not
2242 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002243 "_elementtree._element_iterator", /* tp_name */
2244 sizeof(ElementIterObject), /* tp_basicsize */
2245 0, /* tp_itemsize */
2246 /* methods */
2247 (destructor)elementiter_dealloc, /* tp_dealloc */
2248 0, /* tp_print */
2249 0, /* tp_getattr */
2250 0, /* tp_setattr */
2251 0, /* tp_reserved */
2252 0, /* tp_repr */
2253 0, /* tp_as_number */
2254 0, /* tp_as_sequence */
2255 0, /* tp_as_mapping */
2256 0, /* tp_hash */
2257 0, /* tp_call */
2258 0, /* tp_str */
2259 0, /* tp_getattro */
2260 0, /* tp_setattro */
2261 0, /* tp_as_buffer */
2262 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2263 0, /* tp_doc */
2264 (traverseproc)elementiter_traverse, /* tp_traverse */
2265 0, /* tp_clear */
2266 0, /* tp_richcompare */
2267 0, /* tp_weaklistoffset */
2268 PyObject_SelfIter, /* tp_iter */
2269 (iternextfunc)elementiter_next, /* tp_iternext */
2270 0, /* tp_methods */
2271 0, /* tp_members */
2272 0, /* tp_getset */
2273 0, /* tp_base */
2274 0, /* tp_dict */
2275 0, /* tp_descr_get */
2276 0, /* tp_descr_set */
2277 0, /* tp_dictoffset */
2278 0, /* tp_init */
2279 0, /* tp_alloc */
2280 0, /* tp_new */
2281};
2282
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002283#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002284
2285static PyObject *
2286create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2287{
2288 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002289
2290 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2291 if (!it)
2292 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002293
Victor Stinner4d463432013-07-11 23:05:03 +02002294 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002295 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002296 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002297 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002298 it->root_element = self;
2299
Eli Bendersky64d11e62012-06-15 07:42:50 +03002300 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002301
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002302 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002303 if (it->parent_stack == NULL) {
2304 Py_DECREF(it);
2305 PyErr_NoMemory();
2306 return NULL;
2307 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002308 it->parent_stack_used = 0;
2309 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002310
Eli Bendersky64d11e62012-06-15 07:42:50 +03002311 return (PyObject *)it;
2312}
2313
2314
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315/* ==================================================================== */
2316/* the tree builder type */
2317
2318typedef struct {
2319 PyObject_HEAD
2320
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
Antoine Pitrouee329312012-10-04 19:53:29 +02002323 PyObject *this; /* current node */
2324 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327
Eli Bendersky58d548d2012-05-29 15:45:16 +03002328 PyObject *stack; /* element stack */
2329 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330
Eli Bendersky48d358b2012-05-30 17:57:50 +03002331 PyObject *element_factory;
2332
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002333 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002334 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002335 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2336 PyObject *end_event_obj;
2337 PyObject *start_ns_event_obj;
2338 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339} TreeBuilderObject;
2340
Christian Heimes90aa7642007-12-19 02:45:37 +00002341#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002342
2343/* -------------------------------------------------------------------- */
2344/* constructor and destructor */
2345
Eli Bendersky58d548d2012-05-29 15:45:16 +03002346static PyObject *
2347treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002348{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002349 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2350 if (t != NULL) {
2351 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002354 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002356 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002357
Eli Bendersky58d548d2012-05-29 15:45:16 +03002358 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002359 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002360 t->stack = PyList_New(20);
2361 if (!t->stack) {
2362 Py_DECREF(t->this);
2363 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002364 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002365 return NULL;
2366 }
2367 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002368
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002369 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002370 t->start_event_obj = t->end_event_obj = NULL;
2371 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2372 }
2373 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374}
2375
Serhiy Storchakacb985562015-05-04 15:32:48 +03002376/*[clinic input]
2377_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002378
Serhiy Storchakacb985562015-05-04 15:32:48 +03002379 element_factory: object = NULL
2380
2381[clinic start generated code]*/
2382
2383static int
2384_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2385 PyObject *element_factory)
2386/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2387{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002388 if (element_factory) {
2389 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002390 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002391 }
2392
Eli Bendersky58d548d2012-05-29 15:45:16 +03002393 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394}
2395
Eli Bendersky48d358b2012-05-30 17:57:50 +03002396static int
2397treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2398{
2399 Py_VISIT(self->root);
2400 Py_VISIT(self->this);
2401 Py_VISIT(self->last);
2402 Py_VISIT(self->data);
2403 Py_VISIT(self->stack);
2404 Py_VISIT(self->element_factory);
2405 return 0;
2406}
2407
2408static int
2409treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002411 Py_CLEAR(self->end_ns_event_obj);
2412 Py_CLEAR(self->start_ns_event_obj);
2413 Py_CLEAR(self->end_event_obj);
2414 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002415 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002416 Py_CLEAR(self->stack);
2417 Py_CLEAR(self->data);
2418 Py_CLEAR(self->last);
2419 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002421 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002422 return 0;
2423}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424
Eli Bendersky48d358b2012-05-30 17:57:50 +03002425static void
2426treebuilder_dealloc(TreeBuilderObject *self)
2427{
2428 PyObject_GC_UnTrack(self);
2429 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002430 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002431}
2432
2433/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002434/* helpers for handling of arbitrary element-like objects */
2435
2436static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002437treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002438 PyObject **dest, _Py_Identifier *name)
2439{
2440 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002441 PyObject *tmp = JOIN_OBJ(*dest);
2442 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2443 *data = NULL;
2444 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002445 return 0;
2446 }
2447 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002448 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002449 int r;
2450 if (joined == NULL)
2451 return -1;
2452 r = _PyObject_SetAttrId(element, name, joined);
2453 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002454 if (r < 0)
2455 return -1;
2456 Py_CLEAR(*data);
2457 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002458 }
2459}
2460
Serhiy Storchaka576def02017-03-30 09:47:31 +03002461LOCAL(int)
2462treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002463{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002464 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002465
Serhiy Storchaka576def02017-03-30 09:47:31 +03002466 if (!self->data) {
2467 return 0;
2468 }
2469
2470 if (self->this == element) {
2471 _Py_IDENTIFIER(text);
2472 return treebuilder_set_element_text_or_tail(
2473 element, &self->data,
2474 &((ElementObject *) element)->text, &PyId_text);
2475 }
2476 else {
2477 _Py_IDENTIFIER(tail);
2478 return treebuilder_set_element_text_or_tail(
2479 element, &self->data,
2480 &((ElementObject *) element)->tail, &PyId_tail);
2481 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002482}
2483
2484static int
2485treebuilder_add_subelement(PyObject *element, PyObject *child)
2486{
2487 _Py_IDENTIFIER(append);
2488 if (Element_CheckExact(element)) {
2489 ElementObject *elem = (ElementObject *) element;
2490 return element_add_subelement(elem, child);
2491 }
2492 else {
2493 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002494 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002495 if (res == NULL)
2496 return -1;
2497 Py_DECREF(res);
2498 return 0;
2499 }
2500}
2501
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002502LOCAL(int)
2503treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2504 PyObject *node)
2505{
2506 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002507 PyObject *res;
2508 PyObject *event = PyTuple_Pack(2, action, node);
2509 if (event == NULL)
2510 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002511 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002512 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002513 if (res == NULL)
2514 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002515 Py_DECREF(res);
2516 }
2517 return 0;
2518}
2519
Antoine Pitrouee329312012-10-04 19:53:29 +02002520/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521/* handlers */
2522
2523LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2525 PyObject* attrib)
2526{
2527 PyObject* node;
2528 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002529 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530
Serhiy Storchaka576def02017-03-30 09:47:31 +03002531 if (treebuilder_flush_data(self) < 0) {
2532 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 }
2534
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002535 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002536 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002537 } else if (attrib == Py_None) {
2538 attrib = PyDict_New();
2539 if (!attrib)
2540 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002541 node = PyObject_CallFunctionObjArgs(self->element_factory,
2542 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002543 Py_DECREF(attrib);
2544 }
2545 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002546 node = PyObject_CallFunctionObjArgs(self->element_factory,
2547 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002548 }
2549 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002551 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552
Antoine Pitrouee329312012-10-04 19:53:29 +02002553 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554
2555 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002556 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002557 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 } else {
2559 if (self->root) {
2560 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002561 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 "multiple elements on top level"
2563 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002564 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002565 }
2566 Py_INCREF(node);
2567 self->root = node;
2568 }
2569
2570 if (self->index < PyList_GET_SIZE(self->stack)) {
2571 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002572 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 Py_INCREF(this);
2574 } else {
2575 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002576 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002577 }
2578 self->index++;
2579
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002581 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002583 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002585 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2586 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587
2588 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002589
2590 error:
2591 Py_DECREF(node);
2592 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593}
2594
2595LOCAL(PyObject*)
2596treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2597{
2598 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002599 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002600 /* ignore calls to data before the first call to start */
2601 Py_RETURN_NONE;
2602 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 /* store the first item as is */
2604 Py_INCREF(data); self->data = data;
2605 } else {
2606 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002607 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2608 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002609 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 /* expat often generates single character data sections; handle
2611 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002612 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2613 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002615 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616 } else if (PyList_CheckExact(self->data)) {
2617 if (PyList_Append(self->data, data) < 0)
2618 return NULL;
2619 } else {
2620 PyObject* list = PyList_New(2);
2621 if (!list)
2622 return NULL;
2623 PyList_SET_ITEM(list, 0, self->data);
2624 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2625 self->data = list;
2626 }
2627 }
2628
2629 Py_RETURN_NONE;
2630}
2631
2632LOCAL(PyObject*)
2633treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2634{
2635 PyObject* item;
2636
Serhiy Storchaka576def02017-03-30 09:47:31 +03002637 if (treebuilder_flush_data(self) < 0) {
2638 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639 }
2640
2641 if (self->index == 0) {
2642 PyErr_SetString(
2643 PyExc_IndexError,
2644 "pop from empty stack"
2645 );
2646 return NULL;
2647 }
2648
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002649 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002650 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002651 self->index--;
2652 self->this = PyList_GET_ITEM(self->stack, self->index);
2653 Py_INCREF(self->this);
2654 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002655
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002656 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2657 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658
2659 Py_INCREF(self->last);
2660 return (PyObject*) self->last;
2661}
2662
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663/* -------------------------------------------------------------------- */
2664/* methods (in alphabetical order) */
2665
Serhiy Storchakacb985562015-05-04 15:32:48 +03002666/*[clinic input]
2667_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668
Serhiy Storchakacb985562015-05-04 15:32:48 +03002669 data: object
2670 /
2671
2672[clinic start generated code]*/
2673
2674static PyObject *
2675_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2676/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2677{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678 return treebuilder_handle_data(self, data);
2679}
2680
Serhiy Storchakacb985562015-05-04 15:32:48 +03002681/*[clinic input]
2682_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683
Serhiy Storchakacb985562015-05-04 15:32:48 +03002684 tag: object
2685 /
2686
2687[clinic start generated code]*/
2688
2689static PyObject *
2690_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2691/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2692{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693 return treebuilder_handle_end(self, tag);
2694}
2695
2696LOCAL(PyObject*)
2697treebuilder_done(TreeBuilderObject* self)
2698{
2699 PyObject* res;
2700
2701 /* FIXME: check stack size? */
2702
2703 if (self->root)
2704 res = self->root;
2705 else
2706 res = Py_None;
2707
2708 Py_INCREF(res);
2709 return res;
2710}
2711
Serhiy Storchakacb985562015-05-04 15:32:48 +03002712/*[clinic input]
2713_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714
Serhiy Storchakacb985562015-05-04 15:32:48 +03002715[clinic start generated code]*/
2716
2717static PyObject *
2718_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2719/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2720{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721 return treebuilder_done(self);
2722}
2723
Serhiy Storchakacb985562015-05-04 15:32:48 +03002724/*[clinic input]
2725_elementtree.TreeBuilder.start
2726
2727 tag: object
2728 attrs: object = None
2729 /
2730
2731[clinic start generated code]*/
2732
2733static PyObject *
2734_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2735 PyObject *attrs)
2736/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002738 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739}
2740
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741/* ==================================================================== */
2742/* the expat interface */
2743
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002746
2747/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2748 * cached globally without being in per-module state.
2749 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002750static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752
Eli Bendersky52467b12012-06-01 07:13:08 +03002753static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2754 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2755
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756typedef struct {
2757 PyObject_HEAD
2758
2759 XML_Parser parser;
2760
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002761 PyObject *target;
2762 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002764 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002766 PyObject *handle_start;
2767 PyObject *handle_data;
2768 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002770 PyObject *handle_comment;
2771 PyObject *handle_pi;
2772 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002774 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776} XMLParserObject;
2777
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002778static PyObject*
Serhiy Storchaka6969eaf2017-07-03 21:20:15 +03002779_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject **args, Py_ssize_t nargs);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002780static PyObject *
2781_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2782 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784/* helpers */
2785
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786LOCAL(PyObject*)
2787makeuniversal(XMLParserObject* self, const char* string)
2788{
2789 /* convert a UTF-8 tag/attribute name from the expat parser
2790 to a universal name string */
2791
Antoine Pitrouc1948842012-10-01 23:40:37 +02002792 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 PyObject* key;
2794 PyObject* value;
2795
2796 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002797 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798 if (!key)
2799 return NULL;
2800
2801 value = PyDict_GetItem(self->names, key);
2802
2803 if (value) {
2804 Py_INCREF(value);
2805 } else {
2806 /* new name. convert to universal name, and decode as
2807 necessary */
2808
2809 PyObject* tag;
2810 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002811 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812
2813 /* look for namespace separator */
2814 for (i = 0; i < size; i++)
2815 if (string[i] == '}')
2816 break;
2817 if (i != size) {
2818 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002819 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002820 if (tag == NULL) {
2821 Py_DECREF(key);
2822 return NULL;
2823 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002824 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002825 p[0] = '{';
2826 memcpy(p+1, string, size);
2827 size++;
2828 } else {
2829 /* plain name; use key as tag */
2830 Py_INCREF(key);
2831 tag = key;
2832 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002833
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002834 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002835 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002836 value = PyUnicode_DecodeUTF8(p, size, "strict");
2837 Py_DECREF(tag);
2838 if (!value) {
2839 Py_DECREF(key);
2840 return NULL;
2841 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002842
2843 /* add to names dictionary */
2844 if (PyDict_SetItem(self->names, key, value) < 0) {
2845 Py_DECREF(key);
2846 Py_DECREF(value);
2847 return NULL;
2848 }
2849 }
2850
2851 Py_DECREF(key);
2852 return value;
2853}
2854
Eli Bendersky5b77d812012-03-16 08:20:05 +02002855/* Set the ParseError exception with the given parameters.
2856 * If message is not NULL, it's used as the error string. Otherwise, the
2857 * message string is the default for the given error_code.
2858*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002859static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002860expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2861 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002862{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002863 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002864 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002865
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002866 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002867 message ? message : EXPAT(ErrorString)(error_code),
2868 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002869 if (errmsg == NULL)
2870 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002872 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002873 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002874 if (!error)
2875 return;
2876
Eli Bendersky5b77d812012-03-16 08:20:05 +02002877 /* Add code and position attributes */
2878 code = PyLong_FromLong((long)error_code);
2879 if (!code) {
2880 Py_DECREF(error);
2881 return;
2882 }
2883 if (PyObject_SetAttrString(error, "code", code) == -1) {
2884 Py_DECREF(error);
2885 Py_DECREF(code);
2886 return;
2887 }
2888 Py_DECREF(code);
2889
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002890 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002891 if (!position) {
2892 Py_DECREF(error);
2893 return;
2894 }
2895 if (PyObject_SetAttrString(error, "position", position) == -1) {
2896 Py_DECREF(error);
2897 Py_DECREF(position);
2898 return;
2899 }
2900 Py_DECREF(position);
2901
Eli Bendersky532d03e2013-08-10 08:00:39 -07002902 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903 Py_DECREF(error);
2904}
2905
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906/* -------------------------------------------------------------------- */
2907/* handlers */
2908
2909static void
2910expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2911 int data_len)
2912{
2913 PyObject* key;
2914 PyObject* value;
2915 PyObject* res;
2916
2917 if (data_len < 2 || data_in[0] != '&')
2918 return;
2919
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002920 if (PyErr_Occurred())
2921 return;
2922
Neal Norwitz0269b912007-08-08 06:56:02 +00002923 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 if (!key)
2925 return;
2926
2927 value = PyDict_GetItem(self->entity, key);
2928
2929 if (value) {
2930 if (TreeBuilder_CheckExact(self->target))
2931 res = treebuilder_handle_data(
2932 (TreeBuilderObject*) self->target, value
2933 );
2934 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002935 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 else
2937 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002939 } else if (!PyErr_Occurred()) {
2940 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002941 char message[128] = "undefined entity ";
2942 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002943 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002944 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002946 EXPAT(GetErrorColumnNumber)(self->parser),
2947 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 );
2949 }
2950
2951 Py_DECREF(key);
2952}
2953
2954static void
2955expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2956 const XML_Char **attrib_in)
2957{
2958 PyObject* res;
2959 PyObject* tag;
2960 PyObject* attrib;
2961 int ok;
2962
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002963 if (PyErr_Occurred())
2964 return;
2965
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 /* tag name */
2967 tag = makeuniversal(self, tag_in);
2968 if (!tag)
2969 return; /* parser will look for errors */
2970
2971 /* attributes */
2972 if (attrib_in[0]) {
2973 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002974 if (!attrib) {
2975 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002977 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 while (attrib_in[0] && attrib_in[1]) {
2979 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002980 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 if (!key || !value) {
2982 Py_XDECREF(value);
2983 Py_XDECREF(key);
2984 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002985 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 return;
2987 }
2988 ok = PyDict_SetItem(attrib, key, value);
2989 Py_DECREF(value);
2990 Py_DECREF(key);
2991 if (ok < 0) {
2992 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002993 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002994 return;
2995 }
2996 attrib_in += 2;
2997 }
2998 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002999 Py_INCREF(Py_None);
3000 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03003001 }
3002
3003 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004 /* shortcut */
3005 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3006 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003007 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003008 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003009 if (attrib == Py_None) {
3010 Py_DECREF(attrib);
3011 attrib = PyDict_New();
3012 if (!attrib) {
3013 Py_DECREF(tag);
3014 return;
3015 }
3016 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003017 res = PyObject_CallFunctionObjArgs(self->handle_start,
3018 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003019 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003020 res = NULL;
3021
3022 Py_DECREF(tag);
3023 Py_DECREF(attrib);
3024
3025 Py_XDECREF(res);
3026}
3027
3028static void
3029expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3030 int data_len)
3031{
3032 PyObject* data;
3033 PyObject* res;
3034
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003035 if (PyErr_Occurred())
3036 return;
3037
Neal Norwitz0269b912007-08-08 06:56:02 +00003038 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003039 if (!data)
3040 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041
3042 if (TreeBuilder_CheckExact(self->target))
3043 /* shortcut */
3044 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3045 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003046 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003047 else
3048 res = NULL;
3049
3050 Py_DECREF(data);
3051
3052 Py_XDECREF(res);
3053}
3054
3055static void
3056expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3057{
3058 PyObject* tag;
3059 PyObject* res = NULL;
3060
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003061 if (PyErr_Occurred())
3062 return;
3063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003064 if (TreeBuilder_CheckExact(self->target))
3065 /* shortcut */
3066 /* the standard tree builder doesn't look at the end tag */
3067 res = treebuilder_handle_end(
3068 (TreeBuilderObject*) self->target, Py_None
3069 );
3070 else if (self->handle_end) {
3071 tag = makeuniversal(self, tag_in);
3072 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003073 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003074 Py_DECREF(tag);
3075 }
3076 }
3077
3078 Py_XDECREF(res);
3079}
3080
3081static void
3082expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3083 const XML_Char *uri)
3084{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003085 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3086 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003087
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003088 if (PyErr_Occurred())
3089 return;
3090
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003091 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003092 return;
3093
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003094 if (!uri)
3095 uri = "";
3096 if (!prefix)
3097 prefix = "";
3098
3099 parcel = Py_BuildValue("ss", prefix, uri);
3100 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003101 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003102 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3103 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104}
3105
3106static void
3107expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3108{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003109 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3110
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003111 if (PyErr_Occurred())
3112 return;
3113
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003114 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003115 return;
3116
3117 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003118}
3119
3120static void
3121expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3122{
3123 PyObject* comment;
3124 PyObject* res;
3125
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003126 if (PyErr_Occurred())
3127 return;
3128
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003130 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003132 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3133 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003134 Py_XDECREF(res);
3135 Py_DECREF(comment);
3136 }
3137 }
3138}
3139
Eli Bendersky45839902013-01-13 05:14:47 -08003140static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003141expat_start_doctype_handler(XMLParserObject *self,
3142 const XML_Char *doctype_name,
3143 const XML_Char *sysid,
3144 const XML_Char *pubid,
3145 int has_internal_subset)
3146{
3147 PyObject *self_pyobj = (PyObject *)self;
3148 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3149 PyObject *parser_doctype = NULL;
3150 PyObject *res = NULL;
3151
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003152 if (PyErr_Occurred())
3153 return;
3154
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003155 doctype_name_obj = makeuniversal(self, doctype_name);
3156 if (!doctype_name_obj)
3157 return;
3158
3159 if (sysid) {
3160 sysid_obj = makeuniversal(self, sysid);
3161 if (!sysid_obj) {
3162 Py_DECREF(doctype_name_obj);
3163 return;
3164 }
3165 } else {
3166 Py_INCREF(Py_None);
3167 sysid_obj = Py_None;
3168 }
3169
3170 if (pubid) {
3171 pubid_obj = makeuniversal(self, pubid);
3172 if (!pubid_obj) {
3173 Py_DECREF(doctype_name_obj);
3174 Py_DECREF(sysid_obj);
3175 return;
3176 }
3177 } else {
3178 Py_INCREF(Py_None);
3179 pubid_obj = Py_None;
3180 }
3181
3182 /* If the target has a handler for doctype, call it. */
3183 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003184 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3185 doctype_name_obj, pubid_obj,
3186 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003187 Py_CLEAR(res);
3188 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003189 else {
3190 /* Now see if the parser itself has a doctype method. If yes and it's
3191 * a custom method, call it but warn about deprecation. If it's only
3192 * the vanilla XMLParser method, do nothing.
3193 */
3194 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3195 if (parser_doctype &&
3196 !(PyCFunction_Check(parser_doctype) &&
3197 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3198 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003199 (PyCFunction) _elementtree_XMLParser_doctype)) {
3200 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3201 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003202 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003203 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003204 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003205 res = PyObject_CallFunctionObjArgs(parser_doctype,
3206 doctype_name_obj, pubid_obj,
3207 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003208 Py_CLEAR(res);
3209 }
3210 }
3211
3212clear:
3213 Py_XDECREF(parser_doctype);
3214 Py_DECREF(doctype_name_obj);
3215 Py_DECREF(pubid_obj);
3216 Py_DECREF(sysid_obj);
3217}
3218
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219static void
3220expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3221 const XML_Char* data_in)
3222{
3223 PyObject* target;
3224 PyObject* data;
3225 PyObject* res;
3226
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003227 if (PyErr_Occurred())
3228 return;
3229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003230 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003231 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3232 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003234 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3235 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236 Py_XDECREF(res);
3237 Py_DECREF(data);
3238 Py_DECREF(target);
3239 } else {
3240 Py_XDECREF(data);
3241 Py_XDECREF(target);
3242 }
3243 }
3244}
3245
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247
Eli Bendersky52467b12012-06-01 07:13:08 +03003248static PyObject *
3249xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250{
Eli Bendersky52467b12012-06-01 07:13:08 +03003251 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3252 if (self) {
3253 self->parser = NULL;
3254 self->target = self->entity = self->names = NULL;
3255 self->handle_start = self->handle_data = self->handle_end = NULL;
3256 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003257 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003258 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003259 return (PyObject *)self;
3260}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261
Serhiy Storchakacb985562015-05-04 15:32:48 +03003262/*[clinic input]
3263_elementtree.XMLParser.__init__
3264
3265 html: object = NULL
3266 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003267 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003268
3269[clinic start generated code]*/
3270
Eli Bendersky52467b12012-06-01 07:13:08 +03003271static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3273 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003274/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003275{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003276 if (html != NULL) {
3277 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3278 "The html argument of XMLParser() is deprecated",
3279 1) < 0) {
3280 return -1;
3281 }
3282 }
3283
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284 self->entity = PyDict_New();
3285 if (!self->entity)
3286 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->names = PyDict_New();
3289 if (!self->names) {
3290 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003291 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003293
Serhiy Storchakacb985562015-05-04 15:32:48 +03003294 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3295 if (!self->parser) {
3296 Py_CLEAR(self->entity);
3297 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003299 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 }
3301
Eli Bendersky52467b12012-06-01 07:13:08 +03003302 if (target) {
3303 Py_INCREF(target);
3304 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003305 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003307 Py_CLEAR(self->entity);
3308 Py_CLEAR(self->names);
3309 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003310 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003312 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003313 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
Serhiy Storchakacb985562015-05-04 15:32:48 +03003315 self->handle_start = PyObject_GetAttrString(target, "start");
3316 self->handle_data = PyObject_GetAttrString(target, "data");
3317 self->handle_end = PyObject_GetAttrString(target, "end");
3318 self->handle_comment = PyObject_GetAttrString(target, "comment");
3319 self->handle_pi = PyObject_GetAttrString(target, "pi");
3320 self->handle_close = PyObject_GetAttrString(target, "close");
3321 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322
3323 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003324
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003328 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329 (XML_StartElementHandler) expat_start_handler,
3330 (XML_EndElementHandler) expat_end_handler
3331 );
3332 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003333 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334 (XML_DefaultHandler) expat_default_handler
3335 );
3336 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003337 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 (XML_CharacterDataHandler) expat_data_handler
3339 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003341 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003342 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343 (XML_CommentHandler) expat_comment_handler
3344 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003345 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003347 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348 (XML_ProcessingInstructionHandler) expat_pi_handler
3349 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003350 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003351 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003352 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3353 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003355 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003356 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358
Eli Bendersky52467b12012-06-01 07:13:08 +03003359 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360}
3361
Eli Bendersky52467b12012-06-01 07:13:08 +03003362static int
3363xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3364{
3365 Py_VISIT(self->handle_close);
3366 Py_VISIT(self->handle_pi);
3367 Py_VISIT(self->handle_comment);
3368 Py_VISIT(self->handle_end);
3369 Py_VISIT(self->handle_data);
3370 Py_VISIT(self->handle_start);
3371
3372 Py_VISIT(self->target);
3373 Py_VISIT(self->entity);
3374 Py_VISIT(self->names);
3375
3376 return 0;
3377}
3378
3379static int
3380xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381{
3382 EXPAT(ParserFree)(self->parser);
3383
Antoine Pitrouc1948842012-10-01 23:40:37 +02003384 Py_CLEAR(self->handle_close);
3385 Py_CLEAR(self->handle_pi);
3386 Py_CLEAR(self->handle_comment);
3387 Py_CLEAR(self->handle_end);
3388 Py_CLEAR(self->handle_data);
3389 Py_CLEAR(self->handle_start);
3390 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391
Antoine Pitrouc1948842012-10-01 23:40:37 +02003392 Py_CLEAR(self->target);
3393 Py_CLEAR(self->entity);
3394 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003395
Eli Bendersky52467b12012-06-01 07:13:08 +03003396 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397}
3398
Eli Bendersky52467b12012-06-01 07:13:08 +03003399static void
3400xmlparser_dealloc(XMLParserObject* self)
3401{
3402 PyObject_GC_UnTrack(self);
3403 xmlparser_gc_clear(self);
3404 Py_TYPE(self)->tp_free((PyObject *)self);
3405}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406
3407LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003408expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409{
3410 int ok;
3411
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003412 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3414
3415 if (PyErr_Occurred())
3416 return NULL;
3417
3418 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003419 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003420 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003422 EXPAT(GetErrorColumnNumber)(self->parser),
3423 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003424 );
3425 return NULL;
3426 }
3427
3428 Py_RETURN_NONE;
3429}
3430
Serhiy Storchakacb985562015-05-04 15:32:48 +03003431/*[clinic input]
3432_elementtree.XMLParser.close
3433
3434[clinic start generated code]*/
3435
3436static PyObject *
3437_elementtree_XMLParser_close_impl(XMLParserObject *self)
3438/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003439{
3440 /* end feeding data to parser */
3441
3442 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003444 if (!res)
3445 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003447 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003448 Py_DECREF(res);
3449 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003450 }
3451 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003452 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003453 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003454 }
3455 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003456 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003457 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458}
3459
Serhiy Storchakacb985562015-05-04 15:32:48 +03003460/*[clinic input]
3461_elementtree.XMLParser.feed
3462
3463 data: object
3464 /
3465
3466[clinic start generated code]*/
3467
3468static PyObject *
3469_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3470/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471{
3472 /* feed data to parser */
3473
Serhiy Storchakacb985562015-05-04 15:32:48 +03003474 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003475 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003476 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3477 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003478 return NULL;
3479 if (data_len > INT_MAX) {
3480 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3481 return NULL;
3482 }
3483 /* Explicitly set UTF-8 encoding. Return code ignored. */
3484 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003485 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003486 }
3487 else {
3488 Py_buffer view;
3489 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003490 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003491 return NULL;
3492 if (view.len > INT_MAX) {
3493 PyBuffer_Release(&view);
3494 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3495 return NULL;
3496 }
3497 res = expat_parse(self, view.buf, (int)view.len, 0);
3498 PyBuffer_Release(&view);
3499 return res;
3500 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501}
3502
Serhiy Storchakacb985562015-05-04 15:32:48 +03003503/*[clinic input]
3504_elementtree.XMLParser._parse_whole
3505
3506 file: object
3507 /
3508
3509[clinic start generated code]*/
3510
3511static PyObject *
3512_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3513/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003514{
Eli Benderskya3699232013-05-19 18:47:23 -07003515 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003516 PyObject* reader;
3517 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003518 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003519 PyObject* res;
3520
Serhiy Storchakacb985562015-05-04 15:32:48 +03003521 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003522 if (!reader)
3523 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003524
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525 /* read from open file object */
3526 for (;;) {
3527
3528 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3529
3530 if (!buffer) {
3531 /* read failed (e.g. due to KeyboardInterrupt) */
3532 Py_DECREF(reader);
3533 return NULL;
3534 }
3535
Eli Benderskyf996e772012-03-16 05:53:30 +02003536 if (PyUnicode_CheckExact(buffer)) {
3537 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003538 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003539 Py_DECREF(buffer);
3540 break;
3541 }
3542 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003543 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003544 if (!temp) {
3545 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003546 Py_DECREF(reader);
3547 return NULL;
3548 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003549 buffer = temp;
3550 }
3551 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 Py_DECREF(buffer);
3553 break;
3554 }
3555
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003556 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3557 Py_DECREF(buffer);
3558 Py_DECREF(reader);
3559 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3560 return NULL;
3561 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003562 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003563 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003564 );
3565
3566 Py_DECREF(buffer);
3567
3568 if (!res) {
3569 Py_DECREF(reader);
3570 return NULL;
3571 }
3572 Py_DECREF(res);
3573
3574 }
3575
3576 Py_DECREF(reader);
3577
3578 res = expat_parse(self, "", 0, 1);
3579
3580 if (res && TreeBuilder_CheckExact(self->target)) {
3581 Py_DECREF(res);
3582 return treebuilder_done((TreeBuilderObject*) self->target);
3583 }
3584
3585 return res;
3586}
3587
Serhiy Storchakacb985562015-05-04 15:32:48 +03003588/*[clinic input]
3589_elementtree.XMLParser.doctype
3590
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003591 name: object
3592 pubid: object
3593 system: object
3594 /
3595
Serhiy Storchakacb985562015-05-04 15:32:48 +03003596[clinic start generated code]*/
3597
3598static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003599_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3600 PyObject *pubid, PyObject *system)
3601/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003602{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003603 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3604 "This method of XMLParser is deprecated. Define"
3605 " doctype() method on the TreeBuilder target.",
3606 1) < 0) {
3607 return NULL;
3608 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003609 Py_RETURN_NONE;
3610}
3611
Serhiy Storchakacb985562015-05-04 15:32:48 +03003612/*[clinic input]
3613_elementtree.XMLParser._setevents
3614
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003615 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003616 events_to_report: object = None
3617 /
3618
3619[clinic start generated code]*/
3620
3621static PyObject *
3622_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3623 PyObject *events_queue,
3624 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003625/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626{
3627 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003628 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003629 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003630 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631
3632 if (!TreeBuilder_CheckExact(self->target)) {
3633 PyErr_SetString(
3634 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003635 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636 "targets"
3637 );
3638 return NULL;
3639 }
3640
3641 target = (TreeBuilderObject*) self->target;
3642
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003643 events_append = PyObject_GetAttrString(events_queue, "append");
3644 if (events_append == NULL)
3645 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003646 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003647
3648 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003649 Py_CLEAR(target->start_event_obj);
3650 Py_CLEAR(target->end_event_obj);
3651 Py_CLEAR(target->start_ns_event_obj);
3652 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003654 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003656 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657 Py_RETURN_NONE;
3658 }
3659
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003660 if (!(events_seq = PySequence_Fast(events_to_report,
3661 "events must be a sequence"))) {
3662 return NULL;
3663 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03003665 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003666 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003667 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003668 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003669 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003670 } else if (PyBytes_Check(event_name_obj)) {
3671 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003672 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003673 if (event_name == NULL) {
3674 Py_DECREF(events_seq);
3675 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3676 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003677 }
3678
3679 Py_INCREF(event_name_obj);
3680 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003681 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003682 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003683 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003684 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003685 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 EXPAT(SetNamespaceDeclHandler)(
3687 self->parser,
3688 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3689 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3690 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003691 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003692 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693 EXPAT(SetNamespaceDeclHandler)(
3694 self->parser,
3695 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3696 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3697 );
3698 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003699 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003700 Py_DECREF(events_seq);
3701 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003702 return NULL;
3703 }
3704 }
3705
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003706 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003707 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003708}
3709
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003710static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003711xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003712{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003713 if (PyUnicode_Check(nameobj)) {
3714 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003715 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003716 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003717 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003718 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003719 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003720 return PyUnicode_FromFormat(
3721 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003722 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003723 }
3724 else
3725 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003726
Alexander Belopolskye239d232010-12-08 23:31:48 +00003727 Py_INCREF(res);
3728 return res;
3729 }
3730 generic:
3731 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003732}
3733
Serhiy Storchakacb985562015-05-04 15:32:48 +03003734#include "clinic/_elementtree.c.h"
3735
3736static PyMethodDef element_methods[] = {
3737
3738 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3739
3740 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3741 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3742
3743 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3744 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3745 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3746
3747 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3748 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3749 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3750 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3751
3752 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3753 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3754 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3755
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003756 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003757 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3758
3759 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3760 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3761
3762 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3763
3764 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3765 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3766 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3767 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3768 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3769
3770 {NULL, NULL}
3771};
3772
3773static PyMappingMethods element_as_mapping = {
3774 (lenfunc) element_length,
3775 (binaryfunc) element_subscr,
3776 (objobjargproc) element_ass_subscr,
3777};
3778
Serhiy Storchakadde08152015-11-25 15:28:13 +02003779static PyGetSetDef element_getsetlist[] = {
3780 {"tag",
3781 (getter)element_tag_getter,
3782 (setter)element_tag_setter,
3783 "A string identifying what kind of data this element represents"},
3784 {"text",
3785 (getter)element_text_getter,
3786 (setter)element_text_setter,
3787 "A string of text directly after the start tag, or None"},
3788 {"tail",
3789 (getter)element_tail_getter,
3790 (setter)element_tail_setter,
3791 "A string of text directly after the end tag, or None"},
3792 {"attrib",
3793 (getter)element_attrib_getter,
3794 (setter)element_attrib_setter,
3795 "A dictionary containing the element's attributes"},
3796 {NULL},
3797};
3798
Serhiy Storchakacb985562015-05-04 15:32:48 +03003799static PyTypeObject Element_Type = {
3800 PyVarObject_HEAD_INIT(NULL, 0)
3801 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3802 /* methods */
3803 (destructor)element_dealloc, /* tp_dealloc */
3804 0, /* tp_print */
3805 0, /* tp_getattr */
3806 0, /* tp_setattr */
3807 0, /* tp_reserved */
3808 (reprfunc)element_repr, /* tp_repr */
3809 0, /* tp_as_number */
3810 &element_as_sequence, /* tp_as_sequence */
3811 &element_as_mapping, /* tp_as_mapping */
3812 0, /* tp_hash */
3813 0, /* tp_call */
3814 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003815 PyObject_GenericGetAttr, /* tp_getattro */
3816 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003817 0, /* tp_as_buffer */
3818 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3819 /* tp_flags */
3820 0, /* tp_doc */
3821 (traverseproc)element_gc_traverse, /* tp_traverse */
3822 (inquiry)element_gc_clear, /* tp_clear */
3823 0, /* tp_richcompare */
3824 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3825 0, /* tp_iter */
3826 0, /* tp_iternext */
3827 element_methods, /* tp_methods */
3828 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003829 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003830 0, /* tp_base */
3831 0, /* tp_dict */
3832 0, /* tp_descr_get */
3833 0, /* tp_descr_set */
3834 0, /* tp_dictoffset */
3835 (initproc)element_init, /* tp_init */
3836 PyType_GenericAlloc, /* tp_alloc */
3837 element_new, /* tp_new */
3838 0, /* tp_free */
3839};
3840
3841static PyMethodDef treebuilder_methods[] = {
3842 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3843 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3844 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3845 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3846 {NULL, NULL}
3847};
3848
3849static PyTypeObject TreeBuilder_Type = {
3850 PyVarObject_HEAD_INIT(NULL, 0)
3851 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3852 /* methods */
3853 (destructor)treebuilder_dealloc, /* tp_dealloc */
3854 0, /* tp_print */
3855 0, /* tp_getattr */
3856 0, /* tp_setattr */
3857 0, /* tp_reserved */
3858 0, /* tp_repr */
3859 0, /* tp_as_number */
3860 0, /* tp_as_sequence */
3861 0, /* tp_as_mapping */
3862 0, /* tp_hash */
3863 0, /* tp_call */
3864 0, /* tp_str */
3865 0, /* tp_getattro */
3866 0, /* tp_setattro */
3867 0, /* tp_as_buffer */
3868 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3869 /* tp_flags */
3870 0, /* tp_doc */
3871 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3872 (inquiry)treebuilder_gc_clear, /* tp_clear */
3873 0, /* tp_richcompare */
3874 0, /* tp_weaklistoffset */
3875 0, /* tp_iter */
3876 0, /* tp_iternext */
3877 treebuilder_methods, /* tp_methods */
3878 0, /* tp_members */
3879 0, /* tp_getset */
3880 0, /* tp_base */
3881 0, /* tp_dict */
3882 0, /* tp_descr_get */
3883 0, /* tp_descr_set */
3884 0, /* tp_dictoffset */
3885 _elementtree_TreeBuilder___init__, /* tp_init */
3886 PyType_GenericAlloc, /* tp_alloc */
3887 treebuilder_new, /* tp_new */
3888 0, /* tp_free */
3889};
3890
3891static PyMethodDef xmlparser_methods[] = {
3892 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3893 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3894 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3895 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3896 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3897 {NULL, NULL}
3898};
3899
Neal Norwitz227b5332006-03-22 09:28:35 +00003900static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003901 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003902 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003903 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003904 (destructor)xmlparser_dealloc, /* tp_dealloc */
3905 0, /* tp_print */
3906 0, /* tp_getattr */
3907 0, /* tp_setattr */
3908 0, /* tp_reserved */
3909 0, /* tp_repr */
3910 0, /* tp_as_number */
3911 0, /* tp_as_sequence */
3912 0, /* tp_as_mapping */
3913 0, /* tp_hash */
3914 0, /* tp_call */
3915 0, /* tp_str */
3916 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3917 0, /* tp_setattro */
3918 0, /* tp_as_buffer */
3919 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3920 /* tp_flags */
3921 0, /* tp_doc */
3922 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3923 (inquiry)xmlparser_gc_clear, /* tp_clear */
3924 0, /* tp_richcompare */
3925 0, /* tp_weaklistoffset */
3926 0, /* tp_iter */
3927 0, /* tp_iternext */
3928 xmlparser_methods, /* tp_methods */
3929 0, /* tp_members */
3930 0, /* tp_getset */
3931 0, /* tp_base */
3932 0, /* tp_dict */
3933 0, /* tp_descr_get */
3934 0, /* tp_descr_set */
3935 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003936 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003937 PyType_GenericAlloc, /* tp_alloc */
3938 xmlparser_new, /* tp_new */
3939 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003940};
3941
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003942/* ==================================================================== */
3943/* python module interface */
3944
3945static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003946 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003947 {NULL, NULL}
3948};
3949
Martin v. Löwis1a214512008-06-11 05:26:20 +00003950
Eli Bendersky532d03e2013-08-10 08:00:39 -07003951static struct PyModuleDef elementtreemodule = {
3952 PyModuleDef_HEAD_INIT,
3953 "_elementtree",
3954 NULL,
3955 sizeof(elementtreestate),
3956 _functions,
3957 NULL,
3958 elementtree_traverse,
3959 elementtree_clear,
3960 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003961};
3962
Neal Norwitzf6657e62006-12-28 04:47:50 +00003963PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003964PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003965{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003966 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003967 elementtreestate *st;
3968
3969 m = PyState_FindModule(&elementtreemodule);
3970 if (m) {
3971 Py_INCREF(m);
3972 return m;
3973 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003974
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003975 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003976 if (PyType_Ready(&ElementIter_Type) < 0)
3977 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003978 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003979 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003980 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003981 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003982 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003983 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003984
Eli Bendersky532d03e2013-08-10 08:00:39 -07003985 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003986 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003987 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003988 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003989
Eli Bendersky828efde2012-04-05 05:40:58 +03003990 if (!(temp = PyImport_ImportModule("copy")))
3991 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003992 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003993 Py_XDECREF(temp);
3994
Victor Stinnerb136f112017-07-10 22:28:02 +02003995 if (st->deepcopy_obj == NULL) {
3996 return NULL;
3997 }
3998
3999 assert(!PyErr_Occurred());
Eli Bendersky532d03e2013-08-10 08:00:39 -07004000 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03004001 return NULL;
4002
Eli Bendersky20d41742012-06-01 09:48:37 +03004003 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004004 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4005 if (expat_capi) {
4006 /* check that it's usable */
4007 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004008 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004009 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4010 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004011 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004012 PyErr_SetString(PyExc_ImportError,
4013 "pyexpat version is incompatible");
4014 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004015 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004016 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004017 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004018 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004019
Eli Bendersky532d03e2013-08-10 08:00:39 -07004020 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004021 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004022 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004023 Py_INCREF(st->parseerror_obj);
4024 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004025
Eli Bendersky092af1f2012-03-04 07:14:03 +02004026 Py_INCREF((PyObject *)&Element_Type);
4027 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4028
Eli Bendersky58d548d2012-05-29 15:45:16 +03004029 Py_INCREF((PyObject *)&TreeBuilder_Type);
4030 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4031
Eli Bendersky52467b12012-06-01 07:13:08 +03004032 Py_INCREF((PyObject *)&XMLParser_Type);
4033 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004034
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004035 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004036}