blob: 456c4a2a79918f7c2dda5e818bed21a759f0f732 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300134 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 return result;
144}
145
Eli Bendersky48d358b2012-05-30 17:57:50 +0300146/* Is the given object an empty dictionary?
147*/
148static int
149is_empty_dict(PyObject *obj)
150{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200151 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300152}
153
154
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200156/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157
158typedef struct {
159
160 /* attributes (a dictionary object), or None if no attributes */
161 PyObject* attrib;
162
163 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200164 Py_ssize_t length; /* actual number of items */
165 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166
167 /* this either points to _children or to a malloced buffer */
168 PyObject* *children;
169
170 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000172} ElementObjectExtra;
173
174typedef struct {
175 PyObject_HEAD
176
177 /* element tag (a string). */
178 PyObject* tag;
179
180 /* text before first child. note that this is a tagged pointer;
181 use JOIN_OBJ to get the object pointer. the join flag is used
182 to distinguish lists created by the tree builder from lists
183 assigned to the attribute by application code; the former
184 should be joined before being returned to the user, the latter
185 should be left intact. */
186 PyObject* text;
187
188 /* text after this element, in parent. note that this is a tagged
189 pointer; use JOIN_OBJ to get the object pointer. */
190 PyObject* tail;
191
192 ElementObjectExtra* extra;
193
Eli Benderskyebf37a22012-04-03 22:02:37 +0300194 PyObject *weakreflist; /* For tp_weaklistoffset */
195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObject;
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198
Christian Heimes90aa7642007-12-19 02:45:37 +0000199#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200202/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203
204LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200205create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206{
207 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200208 if (!self->extra) {
209 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200211 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213 if (!attrib)
214 attrib = Py_None;
215
216 Py_INCREF(attrib);
217 self->extra->attrib = attrib;
218
219 self->extra->length = 0;
220 self->extra->allocated = STATIC_CHILDREN;
221 self->extra->children = self->extra->_children;
222
223 return 0;
224}
225
226LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200227dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000228{
Eli Bendersky08b85292012-04-04 15:55:07 +0300229 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200230 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300231
Eli Benderskyebf37a22012-04-03 22:02:37 +0300232 if (!self->extra)
233 return;
234
235 /* Avoid DECREFs calling into this code again (cycles, etc.)
236 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300237 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300238 self->extra = NULL;
239
240 Py_DECREF(myextra->attrib);
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 for (i = 0; i < myextra->length; i++)
243 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 if (myextra->children != myextra->_children)
246 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
Eli Benderskyebf37a22012-04-03 22:02:37 +0300248 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249}
250
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251/* Convenience internal function to create new Element objects with the given
252 * tag and attributes.
253*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
257 ElementObject* self;
258
Eli Bendersky0192ba32012-03-30 16:38:33 +0300259 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260 if (self == NULL)
261 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 self->extra = NULL;
263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 Py_INCREF(tag);
265 self->tag = tag;
266
267 Py_INCREF(Py_None);
268 self->text = Py_None;
269
270 Py_INCREF(Py_None);
271 self->tail = Py_None;
272
Eli Benderskyebf37a22012-04-03 22:02:37 +0300273 self->weakreflist = NULL;
274
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200275 ALLOC(sizeof(ElementObject), "create element");
276 PyObject_GC_Track(self);
277
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200278 if (attrib != Py_None && !is_empty_dict(attrib)) {
279 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200280 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200281 return NULL;
282 }
283 }
284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 return (PyObject*) self;
286}
287
Eli Bendersky092af1f2012-03-04 07:14:03 +0200288static PyObject *
289element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
290{
291 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
292 if (e != NULL) {
293 Py_INCREF(Py_None);
294 e->tag = Py_None;
295
296 Py_INCREF(Py_None);
297 e->text = Py_None;
298
299 Py_INCREF(Py_None);
300 e->tail = Py_None;
301
302 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300303 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200304 }
305 return (PyObject *)e;
306}
307
Eli Bendersky737b1732012-05-29 06:02:56 +0300308/* Helper function for extracting the attrib dictionary from a keywords dict.
309 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800310 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300311 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700312 *
313 * Return a dictionary with the content of kwds merged into the content of
314 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300315 */
316static PyObject*
317get_attrib_from_keywords(PyObject *kwds)
318{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700319 PyObject *attrib_str = PyUnicode_FromString("attrib");
320 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300321
322 if (attrib) {
323 /* If attrib was found in kwds, copy its value and remove it from
324 * kwds
325 */
326 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700327 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
329 Py_TYPE(attrib)->tp_name);
330 return NULL;
331 }
332 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700333 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 } else {
335 attrib = PyDict_New();
336 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700337
338 Py_DECREF(attrib_str);
339
340 /* attrib can be NULL if PyDict_New failed */
341 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200342 if (PyDict_Update(attrib, kwds) < 0)
343 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300344 return attrib;
345}
346
Serhiy Storchakacb985562015-05-04 15:32:48 +0300347/*[clinic input]
348module _elementtree
349class _elementtree.Element "ElementObject *" "&Element_Type"
350class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
351class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
352[clinic start generated code]*/
353/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
354
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355static int
356element_init(PyObject *self, PyObject *args, PyObject *kwds)
357{
358 PyObject *tag;
359 PyObject *tmp;
360 PyObject *attrib = NULL;
361 ElementObject *self_elem;
362
363 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
364 return -1;
365
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 if (attrib) {
367 /* attrib passed as positional arg */
368 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200369 if (!attrib)
370 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300371 if (kwds) {
372 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200373 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 return -1;
375 }
376 }
377 } else if (kwds) {
378 /* have keywords args */
379 attrib = get_attrib_from_keywords(kwds);
380 if (!attrib)
381 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 }
383
384 self_elem = (ElementObject *)self;
385
Antoine Pitrouc1948842012-10-01 23:40:37 +0200386 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 return -1;
390 }
391 }
392
Eli Bendersky48d358b2012-05-30 17:57:50 +0300393 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395
396 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300398 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399
400 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_DECREF(JOIN_OBJ(tmp));
404
405 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_DECREF(JOIN_OBJ(tmp));
409
410 return 0;
411}
412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000413LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200414element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417 PyObject* *children;
418
419 /* make sure self->children can hold the given number of extra
420 elements. set an exception and return -1 if allocation failed */
421
Victor Stinner5f0af232013-07-11 23:01:36 +0200422 if (!self->extra) {
423 if (create_extra(self, NULL) < 0)
424 return -1;
425 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000426
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200427 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
429 if (size > self->extra->allocated) {
430 /* use Python 2.4's list growth strategy */
431 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * which needs at least 4 bytes.
434 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * be safe.
436 */
437 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200438 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
439 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100442 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 * false alarm always assume at least one child to be safe.
444 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 children = PyObject_Realloc(self->extra->children,
446 size * sizeof(PyObject*));
447 if (!children)
448 goto nomemory;
449 } else {
450 children = PyObject_Malloc(size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 /* copy existing children from static area to malloc buffer */
454 memcpy(children, self->extra->children,
455 self->extra->length * sizeof(PyObject*));
456 }
457 self->extra->children = children;
458 self->extra->allocated = size;
459 }
460
461 return 0;
462
463 nomemory:
464 PyErr_NoMemory();
465 return -1;
466}
467
468LOCAL(int)
469element_add_subelement(ElementObject* self, PyObject* element)
470{
471 /* add a child element to a parent */
472
473 if (element_resize(self, 1) < 0)
474 return -1;
475
476 Py_INCREF(element);
477 self->extra->children[self->extra->length] = element;
478
479 self->extra->length++;
480
481 return 0;
482}
483
484LOCAL(PyObject*)
485element_get_attrib(ElementObject* self)
486{
487 /* return borrowed reference to attrib dictionary */
488 /* note: this function assumes that the extra section exists */
489
490 PyObject* res = self->extra->attrib;
491
492 if (res == Py_None) {
493 /* create missing dictionary */
494 res = PyDict_New();
495 if (!res)
496 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200497 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000498 self->extra->attrib = res;
499 }
500
501 return res;
502}
503
504LOCAL(PyObject*)
505element_get_text(ElementObject* self)
506{
507 /* return borrowed reference to text attribute */
508
Serhiy Storchaka576def02017-03-30 09:47:31 +0300509 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000510
511 if (JOIN_GET(res)) {
512 res = JOIN_OBJ(res);
513 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300514 PyObject *tmp = list_join(res);
515 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000516 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300517 self->text = tmp;
518 Py_DECREF(res);
519 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
Serhiy Storchaka576def02017-03-30 09:47:31 +0300531 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300536 PyObject *tmp = list_join(res);
537 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300539 self->tail = tmp;
540 Py_DECREF(res);
541 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200631 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300632
633 if (self->weakreflist != NULL)
634 PyObject_ClearWeakRefs((PyObject *) self);
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 /* element_gc_clear clears all references and deallocates extra
637 */
638 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639
640 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200641 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200642 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000643}
644
645/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646
Serhiy Storchakacb985562015-05-04 15:32:48 +0300647/*[clinic input]
648_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000649
Serhiy Storchakacb985562015-05-04 15:32:48 +0300650 subelement: object(subclass_of='&Element_Type')
651 /
652
653[clinic start generated code]*/
654
655static PyObject *
656_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
657/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
658{
659 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660 return NULL;
661
662 Py_RETURN_NONE;
663}
664
Serhiy Storchakacb985562015-05-04 15:32:48 +0300665/*[clinic input]
666_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668[clinic start generated code]*/
669
670static PyObject *
671_elementtree_Element_clear_impl(ElementObject *self)
672/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
673{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300674 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->text));
678 self->text = Py_None;
679
680 Py_INCREF(Py_None);
681 Py_DECREF(JOIN_OBJ(self->tail));
682 self->tail = Py_None;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.__copy__
689
690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element___copy___impl(ElementObject *self)
694/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200696 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 ElementObject* element;
698
Eli Bendersky092af1f2012-03-04 07:14:03 +0200699 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800700 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 if (!element)
702 return NULL;
703
704 Py_DECREF(JOIN_OBJ(element->text));
705 element->text = self->text;
706 Py_INCREF(JOIN_OBJ(element->text));
707
708 Py_DECREF(JOIN_OBJ(element->tail));
709 element->tail = self->tail;
710 Py_INCREF(JOIN_OBJ(element->tail));
711
712 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000713 if (element_resize(element, self->extra->length) < 0) {
714 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717
718 for (i = 0; i < self->extra->length; i++) {
719 Py_INCREF(self->extra->children[i]);
720 element->extra->children[i] = self->extra->children[i];
721 }
722
723 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724 }
725
726 return (PyObject*) element;
727}
728
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200729/* Helper for a deep copy. */
730LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
731
Serhiy Storchakacb985562015-05-04 15:32:48 +0300732/*[clinic input]
733_elementtree.Element.__deepcopy__
734
735 memo: object
736 /
737
738[clinic start generated code]*/
739
740static PyObject *
741_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
742/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200744 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 ElementObject* element;
746 PyObject* tag;
747 PyObject* attrib;
748 PyObject* text;
749 PyObject* tail;
750 PyObject* id;
751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752 tag = deepcopy(self->tag, memo);
753 if (!tag)
754 return NULL;
755
756 if (self->extra) {
757 attrib = deepcopy(self->extra->attrib, memo);
758 if (!attrib) {
759 Py_DECREF(tag);
760 return NULL;
761 }
762 } else {
763 Py_INCREF(Py_None);
764 attrib = Py_None;
765 }
766
Eli Bendersky092af1f2012-03-04 07:14:03 +0200767 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000768
769 Py_DECREF(tag);
770 Py_DECREF(attrib);
771
772 if (!element)
773 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100774
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000775 text = deepcopy(JOIN_OBJ(self->text), memo);
776 if (!text)
777 goto error;
778 Py_DECREF(element->text);
779 element->text = JOIN_SET(text, JOIN_GET(self->text));
780
781 tail = deepcopy(JOIN_OBJ(self->tail), memo);
782 if (!tail)
783 goto error;
784 Py_DECREF(element->tail);
785 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
786
787 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000788 if (element_resize(element, self->extra->length) < 0)
789 goto error;
790
791 for (i = 0; i < self->extra->length; i++) {
792 PyObject* child = deepcopy(self->extra->children[i], memo);
793 if (!child) {
794 element->extra->length = i;
795 goto error;
796 }
797 element->extra->children[i] = child;
798 }
799
800 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 }
802
803 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700804 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000805 if (!id)
806 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 i = PyDict_SetItem(memo, id, (PyObject*) element);
809
810 Py_DECREF(id);
811
812 if (i < 0)
813 goto error;
814
815 return (PyObject*) element;
816
817 error:
818 Py_DECREF(element);
819 return NULL;
820}
821
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200822LOCAL(PyObject *)
823deepcopy(PyObject *object, PyObject *memo)
824{
825 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200826 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200827 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200828
829 /* Fast paths */
830 if (object == Py_None || PyUnicode_CheckExact(object)) {
831 Py_INCREF(object);
832 return object;
833 }
834
835 if (Py_REFCNT(object) == 1) {
836 if (PyDict_CheckExact(object)) {
837 PyObject *key, *value;
838 Py_ssize_t pos = 0;
839 int simple = 1;
840 while (PyDict_Next(object, &pos, &key, &value)) {
841 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
842 simple = 0;
843 break;
844 }
845 }
846 if (simple)
847 return PyDict_Copy(object);
848 /* Fall through to general case */
849 }
850 else if (Element_CheckExact(object)) {
851 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
852 }
853 }
854
855 /* General case */
856 st = ET_STATE_GLOBAL;
857 if (!st->deepcopy_obj) {
858 PyErr_SetString(PyExc_RuntimeError,
859 "deepcopy helper not found");
860 return NULL;
861 }
862
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 stack[0] = object;
864 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200865 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->text);
970 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 _clear_joined_ptr(&self->tail);
974 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
975 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
1137 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001178 return _PyObject_CallMethodIdObjArgs(
1179 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
1189 if (!Element_CheckExact(item))
1190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001223 return _PyObject_CallMethodIdObjArgs(
1224 st->elementpath_obj, &PyId_findtext,
1225 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 );
1227
1228 if (!self->extra) {
1229 Py_INCREF(default_value);
1230 return default_value;
1231 }
1232
1233 for (i = 0; i < self->extra->length; i++) {
1234 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001235 int rc;
1236 if (!Element_CheckExact(item))
1237 continue;
1238 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001239 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 if (text == Py_None) {
1243 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001244 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001246 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248 return text;
1249 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001250 Py_DECREF(item);
1251 if (rc < 0)
1252 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 }
1254
1255 Py_INCREF(default_value);
1256 return default_value;
1257}
1258
Serhiy Storchakacb985562015-05-04 15:32:48 +03001259/*[clinic input]
1260_elementtree.Element.findall
1261
1262 path: object
1263 namespaces: object = None
1264
1265[clinic start generated code]*/
1266
1267static PyObject *
1268_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1269 PyObject *namespaces)
1270/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001272 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001275 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001278 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001279 return _PyObject_CallMethodIdObjArgs(
1280 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001283
1284 out = PyList_New(0);
1285 if (!out)
1286 return NULL;
1287
1288 if (!self->extra)
1289 return out;
1290
1291 for (i = 0; i < self->extra->length; i++) {
1292 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 int rc;
1294 if (!Element_CheckExact(item))
1295 continue;
1296 Py_INCREF(item);
1297 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1298 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1299 Py_DECREF(item);
1300 Py_DECREF(out);
1301 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001303 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304 }
1305
1306 return out;
1307}
1308
Serhiy Storchakacb985562015-05-04 15:32:48 +03001309/*[clinic input]
1310_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001311
Serhiy Storchakacb985562015-05-04 15:32:48 +03001312 path: object
1313 namespaces: object = None
1314
1315[clinic start generated code]*/
1316
1317static PyObject *
1318_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1319 PyObject *namespaces)
1320/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1321{
1322 PyObject* tag = path;
1323 _Py_IDENTIFIER(iterfind);
1324 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001325
Victor Stinnerf5616342016-12-09 15:26:00 +01001326 return _PyObject_CallMethodIdObjArgs(
1327 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.get
1332
1333 key: object
1334 default: object = None
1335
1336[clinic start generated code]*/
1337
1338static PyObject *
1339_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1340 PyObject *default_value)
1341/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342{
1343 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344
1345 if (!self->extra || self->extra->attrib == Py_None)
1346 value = default_value;
1347 else {
1348 value = PyDict_GetItem(self->extra->attrib, key);
1349 if (!value)
1350 value = default_value;
1351 }
1352
1353 Py_INCREF(value);
1354 return value;
1355}
1356
Serhiy Storchakacb985562015-05-04 15:32:48 +03001357/*[clinic input]
1358_elementtree.Element.getchildren
1359
1360[clinic start generated code]*/
1361
1362static PyObject *
1363_elementtree_Element_getchildren_impl(ElementObject *self)
1364/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001366 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 PyObject* list;
1368
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001369 if (PyErr_WarnEx(PyExc_DeprecationWarning,
1370 "This method will be removed in future versions. "
1371 "Use 'list(elem)' or iteration over elem instead.",
1372 1) < 0) {
1373 return NULL;
1374 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001375
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376 if (!self->extra)
1377 return PyList_New(0);
1378
1379 list = PyList_New(self->extra->length);
1380 if (!list)
1381 return NULL;
1382
1383 for (i = 0; i < self->extra->length; i++) {
1384 PyObject* item = self->extra->children[i];
1385 Py_INCREF(item);
1386 PyList_SET_ITEM(list, i, item);
1387 }
1388
1389 return list;
1390}
1391
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001392
Eli Bendersky64d11e62012-06-15 07:42:50 +03001393static PyObject *
1394create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1395
1396
Serhiy Storchakacb985562015-05-04 15:32:48 +03001397/*[clinic input]
1398_elementtree.Element.iter
1399
1400 tag: object = None
1401
1402[clinic start generated code]*/
1403
Eli Bendersky64d11e62012-06-15 07:42:50 +03001404static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001405_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1406/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001407{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001408 if (PyUnicode_Check(tag)) {
1409 if (PyUnicode_READY(tag) < 0)
1410 return NULL;
1411 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1412 tag = Py_None;
1413 }
1414 else if (PyBytes_Check(tag)) {
1415 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1416 tag = Py_None;
1417 }
1418
Eli Bendersky64d11e62012-06-15 07:42:50 +03001419 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001420}
1421
1422
Serhiy Storchakacb985562015-05-04 15:32:48 +03001423/*[clinic input]
Serhiy Storchaka762ec972017-03-30 18:12:06 +03001424_elementtree.Element.getiterator
1425
1426 tag: object = None
1427
1428[clinic start generated code]*/
1429
1430static PyObject *
1431_elementtree_Element_getiterator_impl(ElementObject *self, PyObject *tag)
1432/*[clinic end generated code: output=cb69ff4a3742dfa1 input=500da1a03f7b9e28]*/
1433{
1434 /* Change for a DeprecationWarning in 1.4 */
1435 if (PyErr_WarnEx(PyExc_PendingDeprecationWarning,
1436 "This method will be removed in future versions. "
1437 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
1438 1) < 0) {
1439 return NULL;
1440 }
1441 return _elementtree_Element_iter_impl(self, tag);
1442}
1443
1444
1445/*[clinic input]
Serhiy Storchakacb985562015-05-04 15:32:48 +03001446_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001447
Serhiy Storchakacb985562015-05-04 15:32:48 +03001448[clinic start generated code]*/
1449
1450static PyObject *
1451_elementtree_Element_itertext_impl(ElementObject *self)
1452/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1453{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001454 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455}
1456
Eli Bendersky64d11e62012-06-15 07:42:50 +03001457
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001459element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001461 ElementObject* self = (ElementObject*) self_;
1462
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001463 if (!self->extra || index < 0 || index >= self->extra->length) {
1464 PyErr_SetString(
1465 PyExc_IndexError,
1466 "child index out of range"
1467 );
1468 return NULL;
1469 }
1470
1471 Py_INCREF(self->extra->children[index]);
1472 return self->extra->children[index];
1473}
1474
Serhiy Storchakacb985562015-05-04 15:32:48 +03001475/*[clinic input]
1476_elementtree.Element.insert
1477
1478 index: Py_ssize_t
1479 subelement: object(subclass_of='&Element_Type')
1480 /
1481
1482[clinic start generated code]*/
1483
1484static PyObject *
1485_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1486 PyObject *subelement)
1487/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001489 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001490
Victor Stinner5f0af232013-07-11 23:01:36 +02001491 if (!self->extra) {
1492 if (create_extra(self, NULL) < 0)
1493 return NULL;
1494 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001496 if (index < 0) {
1497 index += self->extra->length;
1498 if (index < 0)
1499 index = 0;
1500 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001501 if (index > self->extra->length)
1502 index = self->extra->length;
1503
1504 if (element_resize(self, 1) < 0)
1505 return NULL;
1506
1507 for (i = self->extra->length; i > index; i--)
1508 self->extra->children[i] = self->extra->children[i-1];
1509
Serhiy Storchakacb985562015-05-04 15:32:48 +03001510 Py_INCREF(subelement);
1511 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001512
1513 self->extra->length++;
1514
1515 Py_RETURN_NONE;
1516}
1517
Serhiy Storchakacb985562015-05-04 15:32:48 +03001518/*[clinic input]
1519_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520
Serhiy Storchakacb985562015-05-04 15:32:48 +03001521[clinic start generated code]*/
1522
1523static PyObject *
1524_elementtree_Element_items_impl(ElementObject *self)
1525/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1526{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001527 if (!self->extra || self->extra->attrib == Py_None)
1528 return PyList_New(0);
1529
1530 return PyDict_Items(self->extra->attrib);
1531}
1532
Serhiy Storchakacb985562015-05-04 15:32:48 +03001533/*[clinic input]
1534_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535
Serhiy Storchakacb985562015-05-04 15:32:48 +03001536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_keys_impl(ElementObject *self)
1540/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1541{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542 if (!self->extra || self->extra->attrib == Py_None)
1543 return PyList_New(0);
1544
1545 return PyDict_Keys(self->extra->attrib);
1546}
1547
Martin v. Löwis18e16552006-02-15 17:27:45 +00001548static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549element_length(ElementObject* self)
1550{
1551 if (!self->extra)
1552 return 0;
1553
1554 return self->extra->length;
1555}
1556
Serhiy Storchakacb985562015-05-04 15:32:48 +03001557/*[clinic input]
1558_elementtree.Element.makeelement
1559
1560 tag: object
1561 attrib: object
1562 /
1563
1564[clinic start generated code]*/
1565
1566static PyObject *
1567_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1568 PyObject *attrib)
1569/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570{
1571 PyObject* elem;
1572
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 attrib = PyDict_Copy(attrib);
1574 if (!attrib)
1575 return NULL;
1576
Eli Bendersky092af1f2012-03-04 07:14:03 +02001577 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001578
1579 Py_DECREF(attrib);
1580
1581 return elem;
1582}
1583
Serhiy Storchakacb985562015-05-04 15:32:48 +03001584/*[clinic input]
1585_elementtree.Element.remove
1586
1587 subelement: object(subclass_of='&Element_Type')
1588 /
1589
1590[clinic start generated code]*/
1591
1592static PyObject *
1593_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1594/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001596 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001597 int rc;
1598 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 if (!self->extra) {
1601 /* element has no children, so raise exception */
1602 PyErr_SetString(
1603 PyExc_ValueError,
1604 "list.remove(x): x not in list"
1605 );
1606 return NULL;
1607 }
1608
1609 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001610 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001612 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001613 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001614 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001615 if (rc < 0)
1616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617 }
1618
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001619 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001620 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001621 PyErr_SetString(
1622 PyExc_ValueError,
1623 "list.remove(x): x not in list"
1624 );
1625 return NULL;
1626 }
1627
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001628 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629
1630 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631 for (; i < self->extra->length; i++)
1632 self->extra->children[i] = self->extra->children[i+1];
1633
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001634 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001635 Py_RETURN_NONE;
1636}
1637
1638static PyObject*
1639element_repr(ElementObject* self)
1640{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001641 int status;
1642
1643 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001644 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001645
1646 status = Py_ReprEnter((PyObject *)self);
1647 if (status == 0) {
1648 PyObject *res;
1649 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1650 Py_ReprLeave((PyObject *)self);
1651 return res;
1652 }
1653 if (status > 0)
1654 PyErr_Format(PyExc_RuntimeError,
1655 "reentrant call inside %s.__repr__",
1656 Py_TYPE(self)->tp_name);
1657 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001658}
1659
Serhiy Storchakacb985562015-05-04 15:32:48 +03001660/*[clinic input]
1661_elementtree.Element.set
1662
1663 key: object
1664 value: object
1665 /
1666
1667[clinic start generated code]*/
1668
1669static PyObject *
1670_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1671 PyObject *value)
1672/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001673{
1674 PyObject* attrib;
1675
Victor Stinner5f0af232013-07-11 23:01:36 +02001676 if (!self->extra) {
1677 if (create_extra(self, NULL) < 0)
1678 return NULL;
1679 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001680
1681 attrib = element_get_attrib(self);
1682 if (!attrib)
1683 return NULL;
1684
1685 if (PyDict_SetItem(attrib, key, value) < 0)
1686 return NULL;
1687
1688 Py_RETURN_NONE;
1689}
1690
1691static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001692element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001693{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001694 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001695 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001696 PyObject* old;
1697
1698 if (!self->extra || index < 0 || index >= self->extra->length) {
1699 PyErr_SetString(
1700 PyExc_IndexError,
1701 "child assignment index out of range");
1702 return -1;
1703 }
1704
1705 old = self->extra->children[index];
1706
1707 if (item) {
1708 Py_INCREF(item);
1709 self->extra->children[index] = item;
1710 } else {
1711 self->extra->length--;
1712 for (i = index; i < self->extra->length; i++)
1713 self->extra->children[i] = self->extra->children[i+1];
1714 }
1715
1716 Py_DECREF(old);
1717
1718 return 0;
1719}
1720
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001721static PyObject*
1722element_subscr(PyObject* self_, PyObject* item)
1723{
1724 ElementObject* self = (ElementObject*) self_;
1725
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726 if (PyIndex_Check(item)) {
1727 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001728
1729 if (i == -1 && PyErr_Occurred()) {
1730 return NULL;
1731 }
1732 if (i < 0 && self->extra)
1733 i += self->extra->length;
1734 return element_getitem(self_, i);
1735 }
1736 else if (PySlice_Check(item)) {
1737 Py_ssize_t start, stop, step, slicelen, cur, i;
1738 PyObject* list;
1739
1740 if (!self->extra)
1741 return PyList_New(0);
1742
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001743 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744 return NULL;
1745 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001746 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1747 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748
1749 if (slicelen <= 0)
1750 return PyList_New(0);
1751 else {
1752 list = PyList_New(slicelen);
1753 if (!list)
1754 return NULL;
1755
1756 for (cur = start, i = 0; i < slicelen;
1757 cur += step, i++) {
1758 PyObject* item = self->extra->children[cur];
1759 Py_INCREF(item);
1760 PyList_SET_ITEM(list, i, item);
1761 }
1762
1763 return list;
1764 }
1765 }
1766 else {
1767 PyErr_SetString(PyExc_TypeError,
1768 "element indices must be integers");
1769 return NULL;
1770 }
1771}
1772
1773static int
1774element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1775{
1776 ElementObject* self = (ElementObject*) self_;
1777
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001778 if (PyIndex_Check(item)) {
1779 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001780
1781 if (i == -1 && PyErr_Occurred()) {
1782 return -1;
1783 }
1784 if (i < 0 && self->extra)
1785 i += self->extra->length;
1786 return element_setitem(self_, i, value);
1787 }
1788 else if (PySlice_Check(item)) {
1789 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1790
1791 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001792 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001793
Victor Stinner5f0af232013-07-11 23:01:36 +02001794 if (!self->extra) {
1795 if (create_extra(self, NULL) < 0)
1796 return -1;
1797 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001798
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001799 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001800 return -1;
1801 }
Serhiy Storchakab879fe82017-04-08 09:53:51 +03001802 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1803 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001804
Eli Bendersky865756a2012-03-09 13:38:15 +02001805 if (value == NULL) {
1806 /* Delete slice */
1807 size_t cur;
1808 Py_ssize_t i;
1809
1810 if (slicelen <= 0)
1811 return 0;
1812
1813 /* Since we're deleting, the direction of the range doesn't matter,
1814 * so for simplicity make it always ascending.
1815 */
1816 if (step < 0) {
1817 stop = start + 1;
1818 start = stop + step * (slicelen - 1) - 1;
1819 step = -step;
1820 }
1821
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001822 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001823
1824 /* recycle is a list that will contain all the children
1825 * scheduled for removal.
1826 */
1827 if (!(recycle = PyList_New(slicelen))) {
1828 PyErr_NoMemory();
1829 return -1;
1830 }
1831
1832 /* This loop walks over all the children that have to be deleted,
1833 * with cur pointing at them. num_moved is the amount of children
1834 * until the next deleted child that have to be "shifted down" to
1835 * occupy the deleted's places.
1836 * Note that in the ith iteration, shifting is done i+i places down
1837 * because i children were already removed.
1838 */
1839 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1840 /* Compute how many children have to be moved, clipping at the
1841 * list end.
1842 */
1843 Py_ssize_t num_moved = step - 1;
1844 if (cur + step >= (size_t)self->extra->length) {
1845 num_moved = self->extra->length - cur - 1;
1846 }
1847
1848 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1849
1850 memmove(
1851 self->extra->children + cur - i,
1852 self->extra->children + cur + 1,
1853 num_moved * sizeof(PyObject *));
1854 }
1855
1856 /* Leftover "tail" after the last removed child */
1857 cur = start + (size_t)slicelen * step;
1858 if (cur < (size_t)self->extra->length) {
1859 memmove(
1860 self->extra->children + cur - slicelen,
1861 self->extra->children + cur,
1862 (self->extra->length - cur) * sizeof(PyObject *));
1863 }
1864
1865 self->extra->length -= slicelen;
1866
1867 /* Discard the recycle list with all the deleted sub-elements */
1868 Py_XDECREF(recycle);
1869 return 0;
1870 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001871
1872 /* A new slice is actually being assigned */
1873 seq = PySequence_Fast(value, "");
1874 if (!seq) {
1875 PyErr_Format(
1876 PyExc_TypeError,
1877 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1878 );
1879 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001881 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001882
1883 if (step != 1 && newlen != slicelen)
1884 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001885 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001886 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 "attempt to assign sequence of size %zd "
1888 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001889 newlen, slicelen
1890 );
1891 return -1;
1892 }
1893
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001894 /* Resize before creating the recycle bin, to prevent refleaks. */
1895 if (newlen > slicelen) {
1896 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001897 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001898 return -1;
1899 }
1900 }
1901
1902 if (slicelen > 0) {
1903 /* to avoid recursive calls to this method (via decref), move
1904 old items to the recycle bin here, and get rid of them when
1905 we're done modifying the element */
1906 recycle = PyList_New(slicelen);
1907 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001908 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001909 return -1;
1910 }
1911 for (cur = start, i = 0; i < slicelen;
1912 cur += step, i++)
1913 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1914 }
1915
1916 if (newlen < slicelen) {
1917 /* delete slice */
1918 for (i = stop; i < self->extra->length; i++)
1919 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1920 } else if (newlen > slicelen) {
1921 /* insert slice */
1922 for (i = self->extra->length-1; i >= stop; i--)
1923 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1924 }
1925
1926 /* replace the slice */
1927 for (cur = start, i = 0; i < newlen;
1928 cur += step, i++) {
1929 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1930 Py_INCREF(element);
1931 self->extra->children[cur] = element;
1932 }
1933
1934 self->extra->length += newlen - slicelen;
1935
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001936 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001937
1938 /* discard the recycle bin, and everything in it */
1939 Py_XDECREF(recycle);
1940
1941 return 0;
1942 }
1943 else {
1944 PyErr_SetString(PyExc_TypeError,
1945 "element indices must be integers");
1946 return -1;
1947 }
1948}
1949
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001950static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001951element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001952{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001953 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001954 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001955 return res;
1956}
1957
Serhiy Storchakadde08152015-11-25 15:28:13 +02001958static PyObject*
1959element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001960{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001961 PyObject *res = element_get_text(self);
1962 Py_XINCREF(res);
1963 return res;
1964}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001965
Serhiy Storchakadde08152015-11-25 15:28:13 +02001966static PyObject*
1967element_tail_getter(ElementObject *self, void *closure)
1968{
1969 PyObject *res = element_get_tail(self);
1970 Py_XINCREF(res);
1971 return res;
1972}
1973
1974static PyObject*
1975element_attrib_getter(ElementObject *self, void *closure)
1976{
1977 PyObject *res;
1978 if (!self->extra) {
1979 if (create_extra(self, NULL) < 0)
1980 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001981 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001982 res = element_get_attrib(self);
1983 Py_XINCREF(res);
1984 return res;
1985}
Victor Stinner4d463432013-07-11 23:05:03 +02001986
Serhiy Storchakadde08152015-11-25 15:28:13 +02001987/* macro for setter validation */
1988#define _VALIDATE_ATTR_VALUE(V) \
1989 if ((V) == NULL) { \
1990 PyErr_SetString( \
1991 PyExc_AttributeError, \
1992 "can't delete element attribute"); \
1993 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001994 }
1995
Serhiy Storchakadde08152015-11-25 15:28:13 +02001996static int
1997element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1998{
1999 _VALIDATE_ATTR_VALUE(value);
2000 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002001 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02002002 return 0;
2003}
2004
2005static int
2006element_text_setter(ElementObject *self, PyObject *value, void *closure)
2007{
2008 _VALIDATE_ATTR_VALUE(value);
2009 Py_INCREF(value);
2010 Py_DECREF(JOIN_OBJ(self->text));
2011 self->text = value;
2012 return 0;
2013}
2014
2015static int
2016element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2017{
2018 _VALIDATE_ATTR_VALUE(value);
2019 Py_INCREF(value);
2020 Py_DECREF(JOIN_OBJ(self->tail));
2021 self->tail = value;
2022 return 0;
2023}
2024
2025static int
2026element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2027{
2028 _VALIDATE_ATTR_VALUE(value);
2029 if (!self->extra) {
2030 if (create_extra(self, NULL) < 0)
2031 return -1;
2032 }
2033 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002034 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002035 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002036}
2037
2038static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002039 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002040 0, /* sq_concat */
2041 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002042 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002043 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002044 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002045 0,
2046};
2047
Eli Bendersky64d11e62012-06-15 07:42:50 +03002048/******************************* Element iterator ****************************/
2049
2050/* ElementIterObject represents the iteration state over an XML element in
2051 * pre-order traversal. To keep track of which sub-element should be returned
2052 * next, a stack of parents is maintained. This is a standard stack-based
2053 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002054 * The stack is managed using a continuous array.
2055 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002056 * the current one is exhausted, and the next child to examine in that parent.
2057 */
2058typedef struct ParentLocator_t {
2059 ElementObject *parent;
2060 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061} ParentLocator;
2062
2063typedef struct {
2064 PyObject_HEAD
2065 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002066 Py_ssize_t parent_stack_used;
2067 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002068 ElementObject *root_element;
2069 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002070 int gettext;
2071} ElementIterObject;
2072
2073
2074static void
2075elementiter_dealloc(ElementIterObject *it)
2076{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077 Py_ssize_t i = it->parent_stack_used;
2078 it->parent_stack_used = 0;
2079 while (i--)
2080 Py_XDECREF(it->parent_stack[i].parent);
2081 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082
2083 Py_XDECREF(it->sought_tag);
2084 Py_XDECREF(it->root_element);
2085
2086 PyObject_GC_UnTrack(it);
2087 PyObject_GC_Del(it);
2088}
2089
2090static int
2091elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2092{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002093 Py_ssize_t i = it->parent_stack_used;
2094 while (i--)
2095 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002096
2097 Py_VISIT(it->root_element);
2098 Py_VISIT(it->sought_tag);
2099 return 0;
2100}
2101
2102/* Helper function for elementiter_next. Add a new parent to the parent stack.
2103 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002104static int
2105parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002106{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002107 ParentLocator *item;
2108
2109 if (it->parent_stack_used >= it->parent_stack_size) {
2110 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2111 ParentLocator *parent_stack = it->parent_stack;
2112 PyMem_Resize(parent_stack, ParentLocator, new_size);
2113 if (parent_stack == NULL)
2114 return -1;
2115 it->parent_stack = parent_stack;
2116 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002118 item = it->parent_stack + it->parent_stack_used++;
2119 Py_INCREF(parent);
2120 item->parent = parent;
2121 item->child_index = 0;
2122 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123}
2124
2125static PyObject *
2126elementiter_next(ElementIterObject *it)
2127{
2128 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002129 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002130 * A short note on gettext: this function serves both the iter() and
2131 * itertext() methods to avoid code duplication. However, there are a few
2132 * small differences in the way these iterations work. Namely:
2133 * - itertext() only yields text from nodes that have it, and continues
2134 * iterating when a node doesn't have text (so it doesn't return any
2135 * node like iter())
2136 * - itertext() also has to handle tail, after finishing with all the
2137 * children of a node.
2138 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002139 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002140 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002141 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142
2143 while (1) {
2144 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002145 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002146 * iterator is exhausted.
2147 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002148 if (!it->parent_stack_used) {
2149 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002150 PyErr_SetNone(PyExc_StopIteration);
2151 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 }
2153
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002154 elem = it->root_element; /* steals a reference */
2155 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002156 }
2157 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002158 /* See if there are children left to traverse in the current parent. If
2159 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002161 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2162 Py_ssize_t child_index = item->child_index;
2163 ElementObjectExtra *extra;
2164 elem = item->parent;
2165 extra = elem->extra;
2166 if (!extra || child_index >= extra->length) {
2167 it->parent_stack_used--;
2168 /* Note that extra condition on it->parent_stack_used here;
2169 * this is because itertext() is supposed to only return *inner*
2170 * text, not text following the element it began iteration with.
2171 */
2172 if (it->gettext && it->parent_stack_used) {
2173 text = element_get_tail(elem);
2174 goto gettext;
2175 }
2176 Py_DECREF(elem);
2177 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002178 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002179
Serhiy Storchaka576def02017-03-30 09:47:31 +03002180 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2181 PyErr_Format(PyExc_AttributeError,
2182 "'%.100s' object has no attribute 'iter'",
2183 Py_TYPE(extra->children[child_index])->tp_name);
2184 return NULL;
2185 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002186 elem = (ElementObject *)extra->children[child_index];
2187 item->child_index++;
2188 Py_INCREF(elem);
2189 }
2190
2191 if (parent_stack_push_new(it, elem) < 0) {
2192 Py_DECREF(elem);
2193 PyErr_NoMemory();
2194 return NULL;
2195 }
2196 if (it->gettext) {
2197 text = element_get_text(elem);
2198 goto gettext;
2199 }
2200
2201 if (it->sought_tag == Py_None)
2202 return (PyObject *)elem;
2203
2204 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2205 if (rc > 0)
2206 return (PyObject *)elem;
2207
2208 Py_DECREF(elem);
2209 if (rc < 0)
2210 return NULL;
2211 continue;
2212
2213gettext:
2214 if (!text) {
2215 Py_DECREF(elem);
2216 return NULL;
2217 }
2218 if (text == Py_None) {
2219 Py_DECREF(elem);
2220 }
2221 else {
2222 Py_INCREF(text);
2223 Py_DECREF(elem);
2224 rc = PyObject_IsTrue(text);
2225 if (rc > 0)
2226 return text;
2227 Py_DECREF(text);
2228 if (rc < 0)
2229 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002230 }
2231 }
2232
2233 return NULL;
2234}
2235
2236
2237static PyTypeObject ElementIter_Type = {
2238 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002239 /* Using the module's name since the pure-Python implementation does not
2240 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002241 "_elementtree._element_iterator", /* tp_name */
2242 sizeof(ElementIterObject), /* tp_basicsize */
2243 0, /* tp_itemsize */
2244 /* methods */
2245 (destructor)elementiter_dealloc, /* tp_dealloc */
2246 0, /* tp_print */
2247 0, /* tp_getattr */
2248 0, /* tp_setattr */
2249 0, /* tp_reserved */
2250 0, /* tp_repr */
2251 0, /* tp_as_number */
2252 0, /* tp_as_sequence */
2253 0, /* tp_as_mapping */
2254 0, /* tp_hash */
2255 0, /* tp_call */
2256 0, /* tp_str */
2257 0, /* tp_getattro */
2258 0, /* tp_setattro */
2259 0, /* tp_as_buffer */
2260 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2261 0, /* tp_doc */
2262 (traverseproc)elementiter_traverse, /* tp_traverse */
2263 0, /* tp_clear */
2264 0, /* tp_richcompare */
2265 0, /* tp_weaklistoffset */
2266 PyObject_SelfIter, /* tp_iter */
2267 (iternextfunc)elementiter_next, /* tp_iternext */
2268 0, /* tp_methods */
2269 0, /* tp_members */
2270 0, /* tp_getset */
2271 0, /* tp_base */
2272 0, /* tp_dict */
2273 0, /* tp_descr_get */
2274 0, /* tp_descr_set */
2275 0, /* tp_dictoffset */
2276 0, /* tp_init */
2277 0, /* tp_alloc */
2278 0, /* tp_new */
2279};
2280
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002281#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002282
2283static PyObject *
2284create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2285{
2286 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002287
2288 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2289 if (!it)
2290 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002291
Victor Stinner4d463432013-07-11 23:05:03 +02002292 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002293 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002295 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002296 it->root_element = self;
2297
Eli Bendersky64d11e62012-06-15 07:42:50 +03002298 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002299
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002300 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002301 if (it->parent_stack == NULL) {
2302 Py_DECREF(it);
2303 PyErr_NoMemory();
2304 return NULL;
2305 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002306 it->parent_stack_used = 0;
2307 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002308
Eli Bendersky64d11e62012-06-15 07:42:50 +03002309 return (PyObject *)it;
2310}
2311
2312
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002313/* ==================================================================== */
2314/* the tree builder type */
2315
2316typedef struct {
2317 PyObject_HEAD
2318
Eli Bendersky58d548d2012-05-29 15:45:16 +03002319 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320
Antoine Pitrouee329312012-10-04 19:53:29 +02002321 PyObject *this; /* current node */
2322 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323
Eli Bendersky58d548d2012-05-29 15:45:16 +03002324 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 PyObject *stack; /* element stack */
2327 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328
Eli Bendersky48d358b2012-05-30 17:57:50 +03002329 PyObject *element_factory;
2330
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002332 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002333 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2334 PyObject *end_event_obj;
2335 PyObject *start_ns_event_obj;
2336 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337} TreeBuilderObject;
2338
Christian Heimes90aa7642007-12-19 02:45:37 +00002339#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002340
2341/* -------------------------------------------------------------------- */
2342/* constructor and destructor */
2343
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344static PyObject *
2345treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002346{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002347 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2348 if (t != NULL) {
2349 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350
Eli Bendersky58d548d2012-05-29 15:45:16 +03002351 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002352 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002354 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002357 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002358 t->stack = PyList_New(20);
2359 if (!t->stack) {
2360 Py_DECREF(t->this);
2361 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002362 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002363 return NULL;
2364 }
2365 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002366
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002367 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002368 t->start_event_obj = t->end_event_obj = NULL;
2369 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2370 }
2371 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372}
2373
Serhiy Storchakacb985562015-05-04 15:32:48 +03002374/*[clinic input]
2375_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002376
Serhiy Storchakacb985562015-05-04 15:32:48 +03002377 element_factory: object = NULL
2378
2379[clinic start generated code]*/
2380
2381static int
2382_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2383 PyObject *element_factory)
2384/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2385{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386 if (element_factory) {
2387 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002388 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002389 }
2390
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392}
2393
Eli Bendersky48d358b2012-05-30 17:57:50 +03002394static int
2395treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2396{
2397 Py_VISIT(self->root);
2398 Py_VISIT(self->this);
2399 Py_VISIT(self->last);
2400 Py_VISIT(self->data);
2401 Py_VISIT(self->stack);
2402 Py_VISIT(self->element_factory);
2403 return 0;
2404}
2405
2406static int
2407treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002408{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002409 Py_CLEAR(self->end_ns_event_obj);
2410 Py_CLEAR(self->start_ns_event_obj);
2411 Py_CLEAR(self->end_event_obj);
2412 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002413 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002414 Py_CLEAR(self->stack);
2415 Py_CLEAR(self->data);
2416 Py_CLEAR(self->last);
2417 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002418 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002419 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002420 return 0;
2421}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002422
Eli Bendersky48d358b2012-05-30 17:57:50 +03002423static void
2424treebuilder_dealloc(TreeBuilderObject *self)
2425{
2426 PyObject_GC_UnTrack(self);
2427 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002428 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002429}
2430
2431/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002432/* helpers for handling of arbitrary element-like objects */
2433
2434static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002435treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002436 PyObject **dest, _Py_Identifier *name)
2437{
2438 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002439 PyObject *tmp = JOIN_OBJ(*dest);
2440 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2441 *data = NULL;
2442 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002443 return 0;
2444 }
2445 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002446 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002447 int r;
2448 if (joined == NULL)
2449 return -1;
2450 r = _PyObject_SetAttrId(element, name, joined);
2451 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002452 if (r < 0)
2453 return -1;
2454 Py_CLEAR(*data);
2455 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002456 }
2457}
2458
Serhiy Storchaka576def02017-03-30 09:47:31 +03002459LOCAL(int)
2460treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002461{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002462 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002463
Serhiy Storchaka576def02017-03-30 09:47:31 +03002464 if (!self->data) {
2465 return 0;
2466 }
2467
2468 if (self->this == element) {
2469 _Py_IDENTIFIER(text);
2470 return treebuilder_set_element_text_or_tail(
2471 element, &self->data,
2472 &((ElementObject *) element)->text, &PyId_text);
2473 }
2474 else {
2475 _Py_IDENTIFIER(tail);
2476 return treebuilder_set_element_text_or_tail(
2477 element, &self->data,
2478 &((ElementObject *) element)->tail, &PyId_tail);
2479 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002480}
2481
2482static int
2483treebuilder_add_subelement(PyObject *element, PyObject *child)
2484{
2485 _Py_IDENTIFIER(append);
2486 if (Element_CheckExact(element)) {
2487 ElementObject *elem = (ElementObject *) element;
2488 return element_add_subelement(elem, child);
2489 }
2490 else {
2491 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002492 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002493 if (res == NULL)
2494 return -1;
2495 Py_DECREF(res);
2496 return 0;
2497 }
2498}
2499
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002500LOCAL(int)
2501treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2502 PyObject *node)
2503{
2504 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002505 PyObject *res;
2506 PyObject *event = PyTuple_Pack(2, action, node);
2507 if (event == NULL)
2508 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002509 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002510 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002511 if (res == NULL)
2512 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002513 Py_DECREF(res);
2514 }
2515 return 0;
2516}
2517
Antoine Pitrouee329312012-10-04 19:53:29 +02002518/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519/* handlers */
2520
2521LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2523 PyObject* attrib)
2524{
2525 PyObject* node;
2526 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002527 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528
Serhiy Storchaka576def02017-03-30 09:47:31 +03002529 if (treebuilder_flush_data(self) < 0) {
2530 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 }
2532
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002533 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002534 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002535 } else if (attrib == Py_None) {
2536 attrib = PyDict_New();
2537 if (!attrib)
2538 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002539 node = PyObject_CallFunctionObjArgs(self->element_factory,
2540 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002541 Py_DECREF(attrib);
2542 }
2543 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002544 node = PyObject_CallFunctionObjArgs(self->element_factory,
2545 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002546 }
2547 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002549 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550
Antoine Pitrouee329312012-10-04 19:53:29 +02002551 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552
2553 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002554 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002555 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 } else {
2557 if (self->root) {
2558 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002559 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 "multiple elements on top level"
2561 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002562 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 }
2564 Py_INCREF(node);
2565 self->root = node;
2566 }
2567
2568 if (self->index < PyList_GET_SIZE(self->stack)) {
2569 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002570 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 Py_INCREF(this);
2572 } else {
2573 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002574 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 }
2576 self->index++;
2577
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002579 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002581 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002583 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2584 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002585
2586 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002587
2588 error:
2589 Py_DECREF(node);
2590 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591}
2592
2593LOCAL(PyObject*)
2594treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2595{
2596 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002597 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002598 /* ignore calls to data before the first call to start */
2599 Py_RETURN_NONE;
2600 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 /* store the first item as is */
2602 Py_INCREF(data); self->data = data;
2603 } else {
2604 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002605 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2606 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002607 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608 /* expat often generates single character data sections; handle
2609 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002610 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2611 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002612 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002613 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614 } else if (PyList_CheckExact(self->data)) {
2615 if (PyList_Append(self->data, data) < 0)
2616 return NULL;
2617 } else {
2618 PyObject* list = PyList_New(2);
2619 if (!list)
2620 return NULL;
2621 PyList_SET_ITEM(list, 0, self->data);
2622 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2623 self->data = list;
2624 }
2625 }
2626
2627 Py_RETURN_NONE;
2628}
2629
2630LOCAL(PyObject*)
2631treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2632{
2633 PyObject* item;
2634
Serhiy Storchaka576def02017-03-30 09:47:31 +03002635 if (treebuilder_flush_data(self) < 0) {
2636 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637 }
2638
2639 if (self->index == 0) {
2640 PyErr_SetString(
2641 PyExc_IndexError,
2642 "pop from empty stack"
2643 );
2644 return NULL;
2645 }
2646
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002647 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002648 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002649 self->index--;
2650 self->this = PyList_GET_ITEM(self->stack, self->index);
2651 Py_INCREF(self->this);
2652 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002654 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2655 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656
2657 Py_INCREF(self->last);
2658 return (PyObject*) self->last;
2659}
2660
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661/* -------------------------------------------------------------------- */
2662/* methods (in alphabetical order) */
2663
Serhiy Storchakacb985562015-05-04 15:32:48 +03002664/*[clinic input]
2665_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666
Serhiy Storchakacb985562015-05-04 15:32:48 +03002667 data: object
2668 /
2669
2670[clinic start generated code]*/
2671
2672static PyObject *
2673_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2674/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2675{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676 return treebuilder_handle_data(self, data);
2677}
2678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679/*[clinic input]
2680_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681
Serhiy Storchakacb985562015-05-04 15:32:48 +03002682 tag: object
2683 /
2684
2685[clinic start generated code]*/
2686
2687static PyObject *
2688_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2689/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2690{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691 return treebuilder_handle_end(self, tag);
2692}
2693
2694LOCAL(PyObject*)
2695treebuilder_done(TreeBuilderObject* self)
2696{
2697 PyObject* res;
2698
2699 /* FIXME: check stack size? */
2700
2701 if (self->root)
2702 res = self->root;
2703 else
2704 res = Py_None;
2705
2706 Py_INCREF(res);
2707 return res;
2708}
2709
Serhiy Storchakacb985562015-05-04 15:32:48 +03002710/*[clinic input]
2711_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712
Serhiy Storchakacb985562015-05-04 15:32:48 +03002713[clinic start generated code]*/
2714
2715static PyObject *
2716_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2717/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2718{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719 return treebuilder_done(self);
2720}
2721
Serhiy Storchakacb985562015-05-04 15:32:48 +03002722/*[clinic input]
2723_elementtree.TreeBuilder.start
2724
2725 tag: object
2726 attrs: object = None
2727 /
2728
2729[clinic start generated code]*/
2730
2731static PyObject *
2732_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2733 PyObject *attrs)
2734/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002736 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737}
2738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739/* ==================================================================== */
2740/* the expat interface */
2741
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002744
2745/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2746 * cached globally without being in per-module state.
2747 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002748static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750
Eli Bendersky52467b12012-06-01 07:13:08 +03002751static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2752 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2753
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754typedef struct {
2755 PyObject_HEAD
2756
2757 XML_Parser parser;
2758
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002759 PyObject *target;
2760 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002762 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002764 PyObject *handle_start;
2765 PyObject *handle_data;
2766 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002768 PyObject *handle_comment;
2769 PyObject *handle_pi;
2770 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002772 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002773
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774} XMLParserObject;
2775
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002776static PyObject*
Victor Stinner0c4a8282017-01-17 02:21:47 +01002777_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002778static PyObject *
2779_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2780 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002781
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782/* helpers */
2783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784LOCAL(PyObject*)
2785makeuniversal(XMLParserObject* self, const char* string)
2786{
2787 /* convert a UTF-8 tag/attribute name from the expat parser
2788 to a universal name string */
2789
Antoine Pitrouc1948842012-10-01 23:40:37 +02002790 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 PyObject* key;
2792 PyObject* value;
2793
2794 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002795 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 if (!key)
2797 return NULL;
2798
2799 value = PyDict_GetItem(self->names, key);
2800
2801 if (value) {
2802 Py_INCREF(value);
2803 } else {
2804 /* new name. convert to universal name, and decode as
2805 necessary */
2806
2807 PyObject* tag;
2808 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002809 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810
2811 /* look for namespace separator */
2812 for (i = 0; i < size; i++)
2813 if (string[i] == '}')
2814 break;
2815 if (i != size) {
2816 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002817 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002818 if (tag == NULL) {
2819 Py_DECREF(key);
2820 return NULL;
2821 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002822 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002823 p[0] = '{';
2824 memcpy(p+1, string, size);
2825 size++;
2826 } else {
2827 /* plain name; use key as tag */
2828 Py_INCREF(key);
2829 tag = key;
2830 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002831
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002832 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002833 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002834 value = PyUnicode_DecodeUTF8(p, size, "strict");
2835 Py_DECREF(tag);
2836 if (!value) {
2837 Py_DECREF(key);
2838 return NULL;
2839 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002840
2841 /* add to names dictionary */
2842 if (PyDict_SetItem(self->names, key, value) < 0) {
2843 Py_DECREF(key);
2844 Py_DECREF(value);
2845 return NULL;
2846 }
2847 }
2848
2849 Py_DECREF(key);
2850 return value;
2851}
2852
Eli Bendersky5b77d812012-03-16 08:20:05 +02002853/* Set the ParseError exception with the given parameters.
2854 * If message is not NULL, it's used as the error string. Otherwise, the
2855 * message string is the default for the given error_code.
2856*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002857static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002858expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2859 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002861 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002862 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002863
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002864 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002865 message ? message : EXPAT(ErrorString)(error_code),
2866 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002867 if (errmsg == NULL)
2868 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002870 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002871 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872 if (!error)
2873 return;
2874
Eli Bendersky5b77d812012-03-16 08:20:05 +02002875 /* Add code and position attributes */
2876 code = PyLong_FromLong((long)error_code);
2877 if (!code) {
2878 Py_DECREF(error);
2879 return;
2880 }
2881 if (PyObject_SetAttrString(error, "code", code) == -1) {
2882 Py_DECREF(error);
2883 Py_DECREF(code);
2884 return;
2885 }
2886 Py_DECREF(code);
2887
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002888 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002889 if (!position) {
2890 Py_DECREF(error);
2891 return;
2892 }
2893 if (PyObject_SetAttrString(error, "position", position) == -1) {
2894 Py_DECREF(error);
2895 Py_DECREF(position);
2896 return;
2897 }
2898 Py_DECREF(position);
2899
Eli Bendersky532d03e2013-08-10 08:00:39 -07002900 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002901 Py_DECREF(error);
2902}
2903
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904/* -------------------------------------------------------------------- */
2905/* handlers */
2906
2907static void
2908expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2909 int data_len)
2910{
2911 PyObject* key;
2912 PyObject* value;
2913 PyObject* res;
2914
2915 if (data_len < 2 || data_in[0] != '&')
2916 return;
2917
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002918 if (PyErr_Occurred())
2919 return;
2920
Neal Norwitz0269b912007-08-08 06:56:02 +00002921 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002922 if (!key)
2923 return;
2924
2925 value = PyDict_GetItem(self->entity, key);
2926
2927 if (value) {
2928 if (TreeBuilder_CheckExact(self->target))
2929 res = treebuilder_handle_data(
2930 (TreeBuilderObject*) self->target, value
2931 );
2932 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002933 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 else
2935 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002937 } else if (!PyErr_Occurred()) {
2938 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002939 char message[128] = "undefined entity ";
2940 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002941 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002942 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002944 EXPAT(GetErrorColumnNumber)(self->parser),
2945 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 );
2947 }
2948
2949 Py_DECREF(key);
2950}
2951
2952static void
2953expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2954 const XML_Char **attrib_in)
2955{
2956 PyObject* res;
2957 PyObject* tag;
2958 PyObject* attrib;
2959 int ok;
2960
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002961 if (PyErr_Occurred())
2962 return;
2963
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 /* tag name */
2965 tag = makeuniversal(self, tag_in);
2966 if (!tag)
2967 return; /* parser will look for errors */
2968
2969 /* attributes */
2970 if (attrib_in[0]) {
2971 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002972 if (!attrib) {
2973 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002975 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 while (attrib_in[0] && attrib_in[1]) {
2977 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002978 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 if (!key || !value) {
2980 Py_XDECREF(value);
2981 Py_XDECREF(key);
2982 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002983 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984 return;
2985 }
2986 ok = PyDict_SetItem(attrib, key, value);
2987 Py_DECREF(value);
2988 Py_DECREF(key);
2989 if (ok < 0) {
2990 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002991 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002992 return;
2993 }
2994 attrib_in += 2;
2995 }
2996 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002997 Py_INCREF(Py_None);
2998 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002999 }
3000
3001 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002 /* shortcut */
3003 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3004 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003005 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003006 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02003007 if (attrib == Py_None) {
3008 Py_DECREF(attrib);
3009 attrib = PyDict_New();
3010 if (!attrib) {
3011 Py_DECREF(tag);
3012 return;
3013 }
3014 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003015 res = PyObject_CallFunctionObjArgs(self->handle_start,
3016 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003017 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 res = NULL;
3019
3020 Py_DECREF(tag);
3021 Py_DECREF(attrib);
3022
3023 Py_XDECREF(res);
3024}
3025
3026static void
3027expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3028 int data_len)
3029{
3030 PyObject* data;
3031 PyObject* res;
3032
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003033 if (PyErr_Occurred())
3034 return;
3035
Neal Norwitz0269b912007-08-08 06:56:02 +00003036 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003037 if (!data)
3038 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039
3040 if (TreeBuilder_CheckExact(self->target))
3041 /* shortcut */
3042 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3043 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003044 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045 else
3046 res = NULL;
3047
3048 Py_DECREF(data);
3049
3050 Py_XDECREF(res);
3051}
3052
3053static void
3054expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3055{
3056 PyObject* tag;
3057 PyObject* res = NULL;
3058
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003059 if (PyErr_Occurred())
3060 return;
3061
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062 if (TreeBuilder_CheckExact(self->target))
3063 /* shortcut */
3064 /* the standard tree builder doesn't look at the end tag */
3065 res = treebuilder_handle_end(
3066 (TreeBuilderObject*) self->target, Py_None
3067 );
3068 else if (self->handle_end) {
3069 tag = makeuniversal(self, tag_in);
3070 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003071 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072 Py_DECREF(tag);
3073 }
3074 }
3075
3076 Py_XDECREF(res);
3077}
3078
3079static void
3080expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3081 const XML_Char *uri)
3082{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003083 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3084 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003085
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003086 if (PyErr_Occurred())
3087 return;
3088
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003089 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003090 return;
3091
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003092 if (!uri)
3093 uri = "";
3094 if (!prefix)
3095 prefix = "";
3096
3097 parcel = Py_BuildValue("ss", prefix, uri);
3098 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003099 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003100 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3101 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102}
3103
3104static void
3105expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3106{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003107 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3108
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003109 if (PyErr_Occurred())
3110 return;
3111
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003112 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003113 return;
3114
3115 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116}
3117
3118static void
3119expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3120{
3121 PyObject* comment;
3122 PyObject* res;
3123
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003124 if (PyErr_Occurred())
3125 return;
3126
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003127 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003128 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003129 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003130 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3131 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003132 Py_XDECREF(res);
3133 Py_DECREF(comment);
3134 }
3135 }
3136}
3137
Eli Bendersky45839902013-01-13 05:14:47 -08003138static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003139expat_start_doctype_handler(XMLParserObject *self,
3140 const XML_Char *doctype_name,
3141 const XML_Char *sysid,
3142 const XML_Char *pubid,
3143 int has_internal_subset)
3144{
3145 PyObject *self_pyobj = (PyObject *)self;
3146 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3147 PyObject *parser_doctype = NULL;
3148 PyObject *res = NULL;
3149
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003150 if (PyErr_Occurred())
3151 return;
3152
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003153 doctype_name_obj = makeuniversal(self, doctype_name);
3154 if (!doctype_name_obj)
3155 return;
3156
3157 if (sysid) {
3158 sysid_obj = makeuniversal(self, sysid);
3159 if (!sysid_obj) {
3160 Py_DECREF(doctype_name_obj);
3161 return;
3162 }
3163 } else {
3164 Py_INCREF(Py_None);
3165 sysid_obj = Py_None;
3166 }
3167
3168 if (pubid) {
3169 pubid_obj = makeuniversal(self, pubid);
3170 if (!pubid_obj) {
3171 Py_DECREF(doctype_name_obj);
3172 Py_DECREF(sysid_obj);
3173 return;
3174 }
3175 } else {
3176 Py_INCREF(Py_None);
3177 pubid_obj = Py_None;
3178 }
3179
3180 /* If the target has a handler for doctype, call it. */
3181 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003182 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3183 doctype_name_obj, pubid_obj,
3184 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003185 Py_CLEAR(res);
3186 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003187 else {
3188 /* Now see if the parser itself has a doctype method. If yes and it's
3189 * a custom method, call it but warn about deprecation. If it's only
3190 * the vanilla XMLParser method, do nothing.
3191 */
3192 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3193 if (parser_doctype &&
3194 !(PyCFunction_Check(parser_doctype) &&
3195 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3196 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003197 (PyCFunction) _elementtree_XMLParser_doctype)) {
3198 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3199 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003200 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003201 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003202 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003203 res = PyObject_CallFunctionObjArgs(parser_doctype,
3204 doctype_name_obj, pubid_obj,
3205 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003206 Py_CLEAR(res);
3207 }
3208 }
3209
3210clear:
3211 Py_XDECREF(parser_doctype);
3212 Py_DECREF(doctype_name_obj);
3213 Py_DECREF(pubid_obj);
3214 Py_DECREF(sysid_obj);
3215}
3216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217static void
3218expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3219 const XML_Char* data_in)
3220{
3221 PyObject* target;
3222 PyObject* data;
3223 PyObject* res;
3224
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003225 if (PyErr_Occurred())
3226 return;
3227
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003229 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3230 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003232 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3233 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 Py_XDECREF(res);
3235 Py_DECREF(data);
3236 Py_DECREF(target);
3237 } else {
3238 Py_XDECREF(data);
3239 Py_XDECREF(target);
3240 }
3241 }
3242}
3243
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245
Eli Bendersky52467b12012-06-01 07:13:08 +03003246static PyObject *
3247xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248{
Eli Bendersky52467b12012-06-01 07:13:08 +03003249 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3250 if (self) {
3251 self->parser = NULL;
3252 self->target = self->entity = self->names = NULL;
3253 self->handle_start = self->handle_data = self->handle_end = NULL;
3254 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003255 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003257 return (PyObject *)self;
3258}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259
Serhiy Storchakacb985562015-05-04 15:32:48 +03003260/*[clinic input]
3261_elementtree.XMLParser.__init__
3262
3263 html: object = NULL
3264 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003265 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266
3267[clinic start generated code]*/
3268
Eli Bendersky52467b12012-06-01 07:13:08 +03003269static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3271 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003272/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003273{
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003274 if (html != NULL) {
3275 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3276 "The html argument of XMLParser() is deprecated",
3277 1) < 0) {
3278 return -1;
3279 }
3280 }
3281
Serhiy Storchakacb985562015-05-04 15:32:48 +03003282 self->entity = PyDict_New();
3283 if (!self->entity)
3284 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 self->names = PyDict_New();
3287 if (!self->names) {
3288 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003291
Serhiy Storchakacb985562015-05-04 15:32:48 +03003292 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3293 if (!self->parser) {
3294 Py_CLEAR(self->entity);
3295 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003297 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 }
3299
Eli Bendersky52467b12012-06-01 07:13:08 +03003300 if (target) {
3301 Py_INCREF(target);
3302 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003303 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003305 Py_CLEAR(self->entity);
3306 Py_CLEAR(self->names);
3307 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003308 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003310 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003311 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312
Serhiy Storchakacb985562015-05-04 15:32:48 +03003313 self->handle_start = PyObject_GetAttrString(target, "start");
3314 self->handle_data = PyObject_GetAttrString(target, "data");
3315 self->handle_end = PyObject_GetAttrString(target, "end");
3316 self->handle_comment = PyObject_GetAttrString(target, "comment");
3317 self->handle_pi = PyObject_GetAttrString(target, "pi");
3318 self->handle_close = PyObject_GetAttrString(target, "close");
3319 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320
3321 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003322
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003324 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003326 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327 (XML_StartElementHandler) expat_start_handler,
3328 (XML_EndElementHandler) expat_end_handler
3329 );
3330 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003331 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332 (XML_DefaultHandler) expat_default_handler
3333 );
3334 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003335 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336 (XML_CharacterDataHandler) expat_data_handler
3337 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003338 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003340 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003341 (XML_CommentHandler) expat_comment_handler
3342 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003343 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003345 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346 (XML_ProcessingInstructionHandler) expat_pi_handler
3347 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003348 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003349 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003350 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3351 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003353 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003354 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356
Eli Bendersky52467b12012-06-01 07:13:08 +03003357 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358}
3359
Eli Bendersky52467b12012-06-01 07:13:08 +03003360static int
3361xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3362{
3363 Py_VISIT(self->handle_close);
3364 Py_VISIT(self->handle_pi);
3365 Py_VISIT(self->handle_comment);
3366 Py_VISIT(self->handle_end);
3367 Py_VISIT(self->handle_data);
3368 Py_VISIT(self->handle_start);
3369
3370 Py_VISIT(self->target);
3371 Py_VISIT(self->entity);
3372 Py_VISIT(self->names);
3373
3374 return 0;
3375}
3376
3377static int
3378xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379{
3380 EXPAT(ParserFree)(self->parser);
3381
Antoine Pitrouc1948842012-10-01 23:40:37 +02003382 Py_CLEAR(self->handle_close);
3383 Py_CLEAR(self->handle_pi);
3384 Py_CLEAR(self->handle_comment);
3385 Py_CLEAR(self->handle_end);
3386 Py_CLEAR(self->handle_data);
3387 Py_CLEAR(self->handle_start);
3388 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389
Antoine Pitrouc1948842012-10-01 23:40:37 +02003390 Py_CLEAR(self->target);
3391 Py_CLEAR(self->entity);
3392 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003393
Eli Bendersky52467b12012-06-01 07:13:08 +03003394 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003395}
3396
Eli Bendersky52467b12012-06-01 07:13:08 +03003397static void
3398xmlparser_dealloc(XMLParserObject* self)
3399{
3400 PyObject_GC_UnTrack(self);
3401 xmlparser_gc_clear(self);
3402 Py_TYPE(self)->tp_free((PyObject *)self);
3403}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404
3405LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003406expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407{
3408 int ok;
3409
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003410 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3412
3413 if (PyErr_Occurred())
3414 return NULL;
3415
3416 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003417 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003418 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003420 EXPAT(GetErrorColumnNumber)(self->parser),
3421 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422 );
3423 return NULL;
3424 }
3425
3426 Py_RETURN_NONE;
3427}
3428
Serhiy Storchakacb985562015-05-04 15:32:48 +03003429/*[clinic input]
3430_elementtree.XMLParser.close
3431
3432[clinic start generated code]*/
3433
3434static PyObject *
3435_elementtree_XMLParser_close_impl(XMLParserObject *self)
3436/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003437{
3438 /* end feeding data to parser */
3439
3440 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003441 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003442 if (!res)
3443 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003444
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003445 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003446 Py_DECREF(res);
3447 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003448 }
3449 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003450 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003451 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003452 }
3453 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003454 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003455 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456}
3457
Serhiy Storchakacb985562015-05-04 15:32:48 +03003458/*[clinic input]
3459_elementtree.XMLParser.feed
3460
3461 data: object
3462 /
3463
3464[clinic start generated code]*/
3465
3466static PyObject *
3467_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3468/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469{
3470 /* feed data to parser */
3471
Serhiy Storchakacb985562015-05-04 15:32:48 +03003472 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003473 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003474 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3475 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003476 return NULL;
3477 if (data_len > INT_MAX) {
3478 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3479 return NULL;
3480 }
3481 /* Explicitly set UTF-8 encoding. Return code ignored. */
3482 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003483 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003484 }
3485 else {
3486 Py_buffer view;
3487 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003488 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003489 return NULL;
3490 if (view.len > INT_MAX) {
3491 PyBuffer_Release(&view);
3492 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3493 return NULL;
3494 }
3495 res = expat_parse(self, view.buf, (int)view.len, 0);
3496 PyBuffer_Release(&view);
3497 return res;
3498 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003499}
3500
Serhiy Storchakacb985562015-05-04 15:32:48 +03003501/*[clinic input]
3502_elementtree.XMLParser._parse_whole
3503
3504 file: object
3505 /
3506
3507[clinic start generated code]*/
3508
3509static PyObject *
3510_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3511/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512{
Eli Benderskya3699232013-05-19 18:47:23 -07003513 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003514 PyObject* reader;
3515 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003516 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 PyObject* res;
3518
Serhiy Storchakacb985562015-05-04 15:32:48 +03003519 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003520 if (!reader)
3521 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003522
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523 /* read from open file object */
3524 for (;;) {
3525
3526 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3527
3528 if (!buffer) {
3529 /* read failed (e.g. due to KeyboardInterrupt) */
3530 Py_DECREF(reader);
3531 return NULL;
3532 }
3533
Eli Benderskyf996e772012-03-16 05:53:30 +02003534 if (PyUnicode_CheckExact(buffer)) {
3535 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003536 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003537 Py_DECREF(buffer);
3538 break;
3539 }
3540 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003541 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003542 if (!temp) {
3543 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003544 Py_DECREF(reader);
3545 return NULL;
3546 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003547 buffer = temp;
3548 }
3549 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550 Py_DECREF(buffer);
3551 break;
3552 }
3553
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003554 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3555 Py_DECREF(buffer);
3556 Py_DECREF(reader);
3557 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3558 return NULL;
3559 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003560 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003561 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003562 );
3563
3564 Py_DECREF(buffer);
3565
3566 if (!res) {
3567 Py_DECREF(reader);
3568 return NULL;
3569 }
3570 Py_DECREF(res);
3571
3572 }
3573
3574 Py_DECREF(reader);
3575
3576 res = expat_parse(self, "", 0, 1);
3577
3578 if (res && TreeBuilder_CheckExact(self->target)) {
3579 Py_DECREF(res);
3580 return treebuilder_done((TreeBuilderObject*) self->target);
3581 }
3582
3583 return res;
3584}
3585
Serhiy Storchakacb985562015-05-04 15:32:48 +03003586/*[clinic input]
3587_elementtree.XMLParser.doctype
3588
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003589 name: object
3590 pubid: object
3591 system: object
3592 /
3593
Serhiy Storchakacb985562015-05-04 15:32:48 +03003594[clinic start generated code]*/
3595
3596static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003597_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3598 PyObject *pubid, PyObject *system)
3599/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003600{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003601 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3602 "This method of XMLParser is deprecated. Define"
3603 " doctype() method on the TreeBuilder target.",
3604 1) < 0) {
3605 return NULL;
3606 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003607 Py_RETURN_NONE;
3608}
3609
Serhiy Storchakacb985562015-05-04 15:32:48 +03003610/*[clinic input]
3611_elementtree.XMLParser._setevents
3612
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003613 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003614 events_to_report: object = None
3615 /
3616
3617[clinic start generated code]*/
3618
3619static PyObject *
3620_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3621 PyObject *events_queue,
3622 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003623/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003624{
3625 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003626 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003627 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003628 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629
3630 if (!TreeBuilder_CheckExact(self->target)) {
3631 PyErr_SetString(
3632 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003633 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634 "targets"
3635 );
3636 return NULL;
3637 }
3638
3639 target = (TreeBuilderObject*) self->target;
3640
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003641 events_append = PyObject_GetAttrString(events_queue, "append");
3642 if (events_append == NULL)
3643 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003644 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003645
3646 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003647 Py_CLEAR(target->start_event_obj);
3648 Py_CLEAR(target->end_event_obj);
3649 Py_CLEAR(target->start_ns_event_obj);
3650 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003651
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003652 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003654 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655 Py_RETURN_NONE;
3656 }
3657
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003658 if (!(events_seq = PySequence_Fast(events_to_report,
3659 "events must be a sequence"))) {
3660 return NULL;
3661 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003663 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003664 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003665 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003666 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003667 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003668 } else if (PyBytes_Check(event_name_obj)) {
3669 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003670 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003671 if (event_name == NULL) {
3672 Py_DECREF(events_seq);
3673 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3674 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003675 }
3676
3677 Py_INCREF(event_name_obj);
3678 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003679 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003680 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003681 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003682 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003683 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684 EXPAT(SetNamespaceDeclHandler)(
3685 self->parser,
3686 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3687 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3688 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003689 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003690 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003691 EXPAT(SetNamespaceDeclHandler)(
3692 self->parser,
3693 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3694 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3695 );
3696 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003697 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003698 Py_DECREF(events_seq);
3699 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700 return NULL;
3701 }
3702 }
3703
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003704 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003705 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003706}
3707
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003708static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003709xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003710{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003711 if (PyUnicode_Check(nameobj)) {
3712 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003713 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003714 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003715 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003716 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003717 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003718 return PyUnicode_FromFormat(
3719 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003721 }
3722 else
3723 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003724
Alexander Belopolskye239d232010-12-08 23:31:48 +00003725 Py_INCREF(res);
3726 return res;
3727 }
3728 generic:
3729 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003730}
3731
Serhiy Storchakacb985562015-05-04 15:32:48 +03003732#include "clinic/_elementtree.c.h"
3733
3734static PyMethodDef element_methods[] = {
3735
3736 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3737
3738 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3739 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3740
3741 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3742 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3743 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3744
3745 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3746 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3747 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3748 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3749
3750 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3751 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3752 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3753
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003754 _ELEMENTTREE_ELEMENT_GETITERATOR_METHODDEF
Serhiy Storchakacb985562015-05-04 15:32:48 +03003755 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3756
3757 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3758 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3759
3760 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3761
3762 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3763 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3764 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3765 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3766 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3767
3768 {NULL, NULL}
3769};
3770
3771static PyMappingMethods element_as_mapping = {
3772 (lenfunc) element_length,
3773 (binaryfunc) element_subscr,
3774 (objobjargproc) element_ass_subscr,
3775};
3776
Serhiy Storchakadde08152015-11-25 15:28:13 +02003777static PyGetSetDef element_getsetlist[] = {
3778 {"tag",
3779 (getter)element_tag_getter,
3780 (setter)element_tag_setter,
3781 "A string identifying what kind of data this element represents"},
3782 {"text",
3783 (getter)element_text_getter,
3784 (setter)element_text_setter,
3785 "A string of text directly after the start tag, or None"},
3786 {"tail",
3787 (getter)element_tail_getter,
3788 (setter)element_tail_setter,
3789 "A string of text directly after the end tag, or None"},
3790 {"attrib",
3791 (getter)element_attrib_getter,
3792 (setter)element_attrib_setter,
3793 "A dictionary containing the element's attributes"},
3794 {NULL},
3795};
3796
Serhiy Storchakacb985562015-05-04 15:32:48 +03003797static PyTypeObject Element_Type = {
3798 PyVarObject_HEAD_INIT(NULL, 0)
3799 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3800 /* methods */
3801 (destructor)element_dealloc, /* tp_dealloc */
3802 0, /* tp_print */
3803 0, /* tp_getattr */
3804 0, /* tp_setattr */
3805 0, /* tp_reserved */
3806 (reprfunc)element_repr, /* tp_repr */
3807 0, /* tp_as_number */
3808 &element_as_sequence, /* tp_as_sequence */
3809 &element_as_mapping, /* tp_as_mapping */
3810 0, /* tp_hash */
3811 0, /* tp_call */
3812 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003813 PyObject_GenericGetAttr, /* tp_getattro */
3814 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003815 0, /* tp_as_buffer */
3816 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3817 /* tp_flags */
3818 0, /* tp_doc */
3819 (traverseproc)element_gc_traverse, /* tp_traverse */
3820 (inquiry)element_gc_clear, /* tp_clear */
3821 0, /* tp_richcompare */
3822 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3823 0, /* tp_iter */
3824 0, /* tp_iternext */
3825 element_methods, /* tp_methods */
3826 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003827 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003828 0, /* tp_base */
3829 0, /* tp_dict */
3830 0, /* tp_descr_get */
3831 0, /* tp_descr_set */
3832 0, /* tp_dictoffset */
3833 (initproc)element_init, /* tp_init */
3834 PyType_GenericAlloc, /* tp_alloc */
3835 element_new, /* tp_new */
3836 0, /* tp_free */
3837};
3838
3839static PyMethodDef treebuilder_methods[] = {
3840 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3841 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3842 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3843 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3844 {NULL, NULL}
3845};
3846
3847static PyTypeObject TreeBuilder_Type = {
3848 PyVarObject_HEAD_INIT(NULL, 0)
3849 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3850 /* methods */
3851 (destructor)treebuilder_dealloc, /* tp_dealloc */
3852 0, /* tp_print */
3853 0, /* tp_getattr */
3854 0, /* tp_setattr */
3855 0, /* tp_reserved */
3856 0, /* tp_repr */
3857 0, /* tp_as_number */
3858 0, /* tp_as_sequence */
3859 0, /* tp_as_mapping */
3860 0, /* tp_hash */
3861 0, /* tp_call */
3862 0, /* tp_str */
3863 0, /* tp_getattro */
3864 0, /* tp_setattro */
3865 0, /* tp_as_buffer */
3866 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3867 /* tp_flags */
3868 0, /* tp_doc */
3869 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3870 (inquiry)treebuilder_gc_clear, /* tp_clear */
3871 0, /* tp_richcompare */
3872 0, /* tp_weaklistoffset */
3873 0, /* tp_iter */
3874 0, /* tp_iternext */
3875 treebuilder_methods, /* tp_methods */
3876 0, /* tp_members */
3877 0, /* tp_getset */
3878 0, /* tp_base */
3879 0, /* tp_dict */
3880 0, /* tp_descr_get */
3881 0, /* tp_descr_set */
3882 0, /* tp_dictoffset */
3883 _elementtree_TreeBuilder___init__, /* tp_init */
3884 PyType_GenericAlloc, /* tp_alloc */
3885 treebuilder_new, /* tp_new */
3886 0, /* tp_free */
3887};
3888
3889static PyMethodDef xmlparser_methods[] = {
3890 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3891 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3892 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3893 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3894 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3895 {NULL, NULL}
3896};
3897
Neal Norwitz227b5332006-03-22 09:28:35 +00003898static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003899 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003900 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003901 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003902 (destructor)xmlparser_dealloc, /* tp_dealloc */
3903 0, /* tp_print */
3904 0, /* tp_getattr */
3905 0, /* tp_setattr */
3906 0, /* tp_reserved */
3907 0, /* tp_repr */
3908 0, /* tp_as_number */
3909 0, /* tp_as_sequence */
3910 0, /* tp_as_mapping */
3911 0, /* tp_hash */
3912 0, /* tp_call */
3913 0, /* tp_str */
3914 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3915 0, /* tp_setattro */
3916 0, /* tp_as_buffer */
3917 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3918 /* tp_flags */
3919 0, /* tp_doc */
3920 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3921 (inquiry)xmlparser_gc_clear, /* tp_clear */
3922 0, /* tp_richcompare */
3923 0, /* tp_weaklistoffset */
3924 0, /* tp_iter */
3925 0, /* tp_iternext */
3926 xmlparser_methods, /* tp_methods */
3927 0, /* tp_members */
3928 0, /* tp_getset */
3929 0, /* tp_base */
3930 0, /* tp_dict */
3931 0, /* tp_descr_get */
3932 0, /* tp_descr_set */
3933 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003934 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003935 PyType_GenericAlloc, /* tp_alloc */
3936 xmlparser_new, /* tp_new */
3937 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003938};
3939
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003940/* ==================================================================== */
3941/* python module interface */
3942
3943static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003944 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003945 {NULL, NULL}
3946};
3947
Martin v. Löwis1a214512008-06-11 05:26:20 +00003948
Eli Bendersky532d03e2013-08-10 08:00:39 -07003949static struct PyModuleDef elementtreemodule = {
3950 PyModuleDef_HEAD_INIT,
3951 "_elementtree",
3952 NULL,
3953 sizeof(elementtreestate),
3954 _functions,
3955 NULL,
3956 elementtree_traverse,
3957 elementtree_clear,
3958 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003959};
3960
Neal Norwitzf6657e62006-12-28 04:47:50 +00003961PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003962PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003963{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003964 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003965 elementtreestate *st;
3966
3967 m = PyState_FindModule(&elementtreemodule);
3968 if (m) {
3969 Py_INCREF(m);
3970 return m;
3971 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003973 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003974 if (PyType_Ready(&ElementIter_Type) < 0)
3975 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003976 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003977 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003978 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003979 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003980 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003981 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003982
Eli Bendersky532d03e2013-08-10 08:00:39 -07003983 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003984 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003985 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003986 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003987
Eli Bendersky828efde2012-04-05 05:40:58 +03003988 if (!(temp = PyImport_ImportModule("copy")))
3989 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003990 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003991 Py_XDECREF(temp);
3992
Eli Bendersky532d03e2013-08-10 08:00:39 -07003993 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003994 return NULL;
3995
Eli Bendersky20d41742012-06-01 09:48:37 +03003996 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003997 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3998 if (expat_capi) {
3999 /* check that it's usable */
4000 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02004001 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004002 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4003 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03004004 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03004005 PyErr_SetString(PyExc_ImportError,
4006 "pyexpat version is incompatible");
4007 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03004008 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03004009 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03004010 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03004011 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004012
Eli Bendersky532d03e2013-08-10 08:00:39 -07004013 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01004014 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004015 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07004016 Py_INCREF(st->parseerror_obj);
4017 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004018
Eli Bendersky092af1f2012-03-04 07:14:03 +02004019 Py_INCREF((PyObject *)&Element_Type);
4020 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
4021
Eli Bendersky58d548d2012-05-29 15:45:16 +03004022 Py_INCREF((PyObject *)&TreeBuilder_Type);
4023 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4024
Eli Bendersky52467b12012-06-01 07:13:08 +03004025 Py_INCREF((PyObject *)&XMLParser_Type);
4026 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004027
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004028 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004029}