blob: 36aa391609f5ea4cf9b05ba5e5d2080e6e19e504 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
Serhiy Storchaka576def02017-03-30 09:47:31 +0300134 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 return result;
144}
145
Eli Bendersky48d358b2012-05-30 17:57:50 +0300146/* Is the given object an empty dictionary?
147*/
148static int
149is_empty_dict(PyObject *obj)
150{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200151 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300152}
153
154
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200156/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157
158typedef struct {
159
160 /* attributes (a dictionary object), or None if no attributes */
161 PyObject* attrib;
162
163 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200164 Py_ssize_t length; /* actual number of items */
165 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166
167 /* this either points to _children or to a malloced buffer */
168 PyObject* *children;
169
170 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000172} ElementObjectExtra;
173
174typedef struct {
175 PyObject_HEAD
176
177 /* element tag (a string). */
178 PyObject* tag;
179
180 /* text before first child. note that this is a tagged pointer;
181 use JOIN_OBJ to get the object pointer. the join flag is used
182 to distinguish lists created by the tree builder from lists
183 assigned to the attribute by application code; the former
184 should be joined before being returned to the user, the latter
185 should be left intact. */
186 PyObject* text;
187
188 /* text after this element, in parent. note that this is a tagged
189 pointer; use JOIN_OBJ to get the object pointer. */
190 PyObject* tail;
191
192 ElementObjectExtra* extra;
193
Eli Benderskyebf37a22012-04-03 22:02:37 +0300194 PyObject *weakreflist; /* For tp_weaklistoffset */
195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObject;
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198
Christian Heimes90aa7642007-12-19 02:45:37 +0000199#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200202/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203
204LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200205create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206{
207 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200208 if (!self->extra) {
209 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200211 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213 if (!attrib)
214 attrib = Py_None;
215
216 Py_INCREF(attrib);
217 self->extra->attrib = attrib;
218
219 self->extra->length = 0;
220 self->extra->allocated = STATIC_CHILDREN;
221 self->extra->children = self->extra->_children;
222
223 return 0;
224}
225
226LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200227dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000228{
Eli Bendersky08b85292012-04-04 15:55:07 +0300229 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200230 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300231
Eli Benderskyebf37a22012-04-03 22:02:37 +0300232 if (!self->extra)
233 return;
234
235 /* Avoid DECREFs calling into this code again (cycles, etc.)
236 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300237 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300238 self->extra = NULL;
239
240 Py_DECREF(myextra->attrib);
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 for (i = 0; i < myextra->length; i++)
243 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 if (myextra->children != myextra->_children)
246 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
Eli Benderskyebf37a22012-04-03 22:02:37 +0300248 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249}
250
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251/* Convenience internal function to create new Element objects with the given
252 * tag and attributes.
253*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
257 ElementObject* self;
258
Eli Bendersky0192ba32012-03-30 16:38:33 +0300259 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260 if (self == NULL)
261 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 self->extra = NULL;
263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 Py_INCREF(tag);
265 self->tag = tag;
266
267 Py_INCREF(Py_None);
268 self->text = Py_None;
269
270 Py_INCREF(Py_None);
271 self->tail = Py_None;
272
Eli Benderskyebf37a22012-04-03 22:02:37 +0300273 self->weakreflist = NULL;
274
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200275 ALLOC(sizeof(ElementObject), "create element");
276 PyObject_GC_Track(self);
277
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200278 if (attrib != Py_None && !is_empty_dict(attrib)) {
279 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200280 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200281 return NULL;
282 }
283 }
284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 return (PyObject*) self;
286}
287
Eli Bendersky092af1f2012-03-04 07:14:03 +0200288static PyObject *
289element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
290{
291 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
292 if (e != NULL) {
293 Py_INCREF(Py_None);
294 e->tag = Py_None;
295
296 Py_INCREF(Py_None);
297 e->text = Py_None;
298
299 Py_INCREF(Py_None);
300 e->tail = Py_None;
301
302 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300303 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200304 }
305 return (PyObject *)e;
306}
307
Eli Bendersky737b1732012-05-29 06:02:56 +0300308/* Helper function for extracting the attrib dictionary from a keywords dict.
309 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800310 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300311 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700312 *
313 * Return a dictionary with the content of kwds merged into the content of
314 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300315 */
316static PyObject*
317get_attrib_from_keywords(PyObject *kwds)
318{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700319 PyObject *attrib_str = PyUnicode_FromString("attrib");
320 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300321
322 if (attrib) {
323 /* If attrib was found in kwds, copy its value and remove it from
324 * kwds
325 */
326 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700327 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
329 Py_TYPE(attrib)->tp_name);
330 return NULL;
331 }
332 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700333 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 } else {
335 attrib = PyDict_New();
336 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700337
338 Py_DECREF(attrib_str);
339
340 /* attrib can be NULL if PyDict_New failed */
341 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200342 if (PyDict_Update(attrib, kwds) < 0)
343 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300344 return attrib;
345}
346
Serhiy Storchakacb985562015-05-04 15:32:48 +0300347/*[clinic input]
348module _elementtree
349class _elementtree.Element "ElementObject *" "&Element_Type"
350class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
351class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
352[clinic start generated code]*/
353/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
354
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355static int
356element_init(PyObject *self, PyObject *args, PyObject *kwds)
357{
358 PyObject *tag;
359 PyObject *tmp;
360 PyObject *attrib = NULL;
361 ElementObject *self_elem;
362
363 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
364 return -1;
365
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 if (attrib) {
367 /* attrib passed as positional arg */
368 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200369 if (!attrib)
370 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300371 if (kwds) {
372 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200373 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 return -1;
375 }
376 }
377 } else if (kwds) {
378 /* have keywords args */
379 attrib = get_attrib_from_keywords(kwds);
380 if (!attrib)
381 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 }
383
384 self_elem = (ElementObject *)self;
385
Antoine Pitrouc1948842012-10-01 23:40:37 +0200386 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 return -1;
390 }
391 }
392
Eli Bendersky48d358b2012-05-30 17:57:50 +0300393 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395
396 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300398 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399
400 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_DECREF(JOIN_OBJ(tmp));
404
405 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_DECREF(JOIN_OBJ(tmp));
409
410 return 0;
411}
412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000413LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200414element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417 PyObject* *children;
418
419 /* make sure self->children can hold the given number of extra
420 elements. set an exception and return -1 if allocation failed */
421
Victor Stinner5f0af232013-07-11 23:01:36 +0200422 if (!self->extra) {
423 if (create_extra(self, NULL) < 0)
424 return -1;
425 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000426
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200427 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
429 if (size > self->extra->allocated) {
430 /* use Python 2.4's list growth strategy */
431 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * which needs at least 4 bytes.
434 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * be safe.
436 */
437 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200438 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
439 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100442 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 * false alarm always assume at least one child to be safe.
444 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 children = PyObject_Realloc(self->extra->children,
446 size * sizeof(PyObject*));
447 if (!children)
448 goto nomemory;
449 } else {
450 children = PyObject_Malloc(size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 /* copy existing children from static area to malloc buffer */
454 memcpy(children, self->extra->children,
455 self->extra->length * sizeof(PyObject*));
456 }
457 self->extra->children = children;
458 self->extra->allocated = size;
459 }
460
461 return 0;
462
463 nomemory:
464 PyErr_NoMemory();
465 return -1;
466}
467
468LOCAL(int)
469element_add_subelement(ElementObject* self, PyObject* element)
470{
471 /* add a child element to a parent */
472
473 if (element_resize(self, 1) < 0)
474 return -1;
475
476 Py_INCREF(element);
477 self->extra->children[self->extra->length] = element;
478
479 self->extra->length++;
480
481 return 0;
482}
483
484LOCAL(PyObject*)
485element_get_attrib(ElementObject* self)
486{
487 /* return borrowed reference to attrib dictionary */
488 /* note: this function assumes that the extra section exists */
489
490 PyObject* res = self->extra->attrib;
491
492 if (res == Py_None) {
493 /* create missing dictionary */
494 res = PyDict_New();
495 if (!res)
496 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200497 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000498 self->extra->attrib = res;
499 }
500
501 return res;
502}
503
504LOCAL(PyObject*)
505element_get_text(ElementObject* self)
506{
507 /* return borrowed reference to text attribute */
508
Serhiy Storchaka576def02017-03-30 09:47:31 +0300509 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000510
511 if (JOIN_GET(res)) {
512 res = JOIN_OBJ(res);
513 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300514 PyObject *tmp = list_join(res);
515 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000516 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300517 self->text = tmp;
518 Py_DECREF(res);
519 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
Serhiy Storchaka576def02017-03-30 09:47:31 +0300531 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +0300536 PyObject *tmp = list_join(res);
537 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Serhiy Storchaka576def02017-03-30 09:47:31 +0300539 self->tail = tmp;
540 Py_DECREF(res);
541 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200631 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300632
633 if (self->weakreflist != NULL)
634 PyObject_ClearWeakRefs((PyObject *) self);
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 /* element_gc_clear clears all references and deallocates extra
637 */
638 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639
640 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200641 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200642 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000643}
644
645/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646
Serhiy Storchakacb985562015-05-04 15:32:48 +0300647/*[clinic input]
648_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000649
Serhiy Storchakacb985562015-05-04 15:32:48 +0300650 subelement: object(subclass_of='&Element_Type')
651 /
652
653[clinic start generated code]*/
654
655static PyObject *
656_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
657/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
658{
659 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660 return NULL;
661
662 Py_RETURN_NONE;
663}
664
Serhiy Storchakacb985562015-05-04 15:32:48 +0300665/*[clinic input]
666_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668[clinic start generated code]*/
669
670static PyObject *
671_elementtree_Element_clear_impl(ElementObject *self)
672/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
673{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300674 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->text));
678 self->text = Py_None;
679
680 Py_INCREF(Py_None);
681 Py_DECREF(JOIN_OBJ(self->tail));
682 self->tail = Py_None;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.__copy__
689
690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element___copy___impl(ElementObject *self)
694/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200696 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 ElementObject* element;
698
Eli Bendersky092af1f2012-03-04 07:14:03 +0200699 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800700 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 if (!element)
702 return NULL;
703
704 Py_DECREF(JOIN_OBJ(element->text));
705 element->text = self->text;
706 Py_INCREF(JOIN_OBJ(element->text));
707
708 Py_DECREF(JOIN_OBJ(element->tail));
709 element->tail = self->tail;
710 Py_INCREF(JOIN_OBJ(element->tail));
711
712 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000713 if (element_resize(element, self->extra->length) < 0) {
714 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717
718 for (i = 0; i < self->extra->length; i++) {
719 Py_INCREF(self->extra->children[i]);
720 element->extra->children[i] = self->extra->children[i];
721 }
722
723 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724 }
725
726 return (PyObject*) element;
727}
728
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200729/* Helper for a deep copy. */
730LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
731
Serhiy Storchakacb985562015-05-04 15:32:48 +0300732/*[clinic input]
733_elementtree.Element.__deepcopy__
734
735 memo: object
736 /
737
738[clinic start generated code]*/
739
740static PyObject *
741_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
742/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200744 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 ElementObject* element;
746 PyObject* tag;
747 PyObject* attrib;
748 PyObject* text;
749 PyObject* tail;
750 PyObject* id;
751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752 tag = deepcopy(self->tag, memo);
753 if (!tag)
754 return NULL;
755
756 if (self->extra) {
757 attrib = deepcopy(self->extra->attrib, memo);
758 if (!attrib) {
759 Py_DECREF(tag);
760 return NULL;
761 }
762 } else {
763 Py_INCREF(Py_None);
764 attrib = Py_None;
765 }
766
Eli Bendersky092af1f2012-03-04 07:14:03 +0200767 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000768
769 Py_DECREF(tag);
770 Py_DECREF(attrib);
771
772 if (!element)
773 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100774
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000775 text = deepcopy(JOIN_OBJ(self->text), memo);
776 if (!text)
777 goto error;
778 Py_DECREF(element->text);
779 element->text = JOIN_SET(text, JOIN_GET(self->text));
780
781 tail = deepcopy(JOIN_OBJ(self->tail), memo);
782 if (!tail)
783 goto error;
784 Py_DECREF(element->tail);
785 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
786
787 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000788 if (element_resize(element, self->extra->length) < 0)
789 goto error;
790
791 for (i = 0; i < self->extra->length; i++) {
792 PyObject* child = deepcopy(self->extra->children[i], memo);
793 if (!child) {
794 element->extra->length = i;
795 goto error;
796 }
797 element->extra->children[i] = child;
798 }
799
800 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 }
802
803 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700804 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000805 if (!id)
806 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 i = PyDict_SetItem(memo, id, (PyObject*) element);
809
810 Py_DECREF(id);
811
812 if (i < 0)
813 goto error;
814
815 return (PyObject*) element;
816
817 error:
818 Py_DECREF(element);
819 return NULL;
820}
821
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200822LOCAL(PyObject *)
823deepcopy(PyObject *object, PyObject *memo)
824{
825 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200826 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200827 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200828
829 /* Fast paths */
830 if (object == Py_None || PyUnicode_CheckExact(object)) {
831 Py_INCREF(object);
832 return object;
833 }
834
835 if (Py_REFCNT(object) == 1) {
836 if (PyDict_CheckExact(object)) {
837 PyObject *key, *value;
838 Py_ssize_t pos = 0;
839 int simple = 1;
840 while (PyDict_Next(object, &pos, &key, &value)) {
841 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
842 simple = 0;
843 break;
844 }
845 }
846 if (simple)
847 return PyDict_Copy(object);
848 /* Fall through to general case */
849 }
850 else if (Element_CheckExact(object)) {
851 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
852 }
853 }
854
855 /* General case */
856 st = ET_STATE_GLOBAL;
857 if (!st->deepcopy_obj) {
858 PyErr_SetString(PyExc_RuntimeError,
859 "deepcopy helper not found");
860 return NULL;
861 }
862
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 stack[0] = object;
864 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200865 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->text);
970 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 _clear_joined_ptr(&self->tail);
974 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
975 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
1137 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001178 return _PyObject_CallMethodIdObjArgs(
1179 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
1189 if (!Element_CheckExact(item))
1190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001223 return _PyObject_CallMethodIdObjArgs(
1224 st->elementpath_obj, &PyId_findtext,
1225 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 );
1227
1228 if (!self->extra) {
1229 Py_INCREF(default_value);
1230 return default_value;
1231 }
1232
1233 for (i = 0; i < self->extra->length; i++) {
1234 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001235 int rc;
1236 if (!Element_CheckExact(item))
1237 continue;
1238 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001239 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001241 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 if (text == Py_None) {
1243 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001244 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001246 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248 return text;
1249 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001250 Py_DECREF(item);
1251 if (rc < 0)
1252 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001253 }
1254
1255 Py_INCREF(default_value);
1256 return default_value;
1257}
1258
Serhiy Storchakacb985562015-05-04 15:32:48 +03001259/*[clinic input]
1260_elementtree.Element.findall
1261
1262 path: object
1263 namespaces: object = None
1264
1265[clinic start generated code]*/
1266
1267static PyObject *
1268_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1269 PyObject *namespaces)
1270/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001272 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001274 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001275 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001278 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001279 return _PyObject_CallMethodIdObjArgs(
1280 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001283
1284 out = PyList_New(0);
1285 if (!out)
1286 return NULL;
1287
1288 if (!self->extra)
1289 return out;
1290
1291 for (i = 0; i < self->extra->length; i++) {
1292 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001293 int rc;
1294 if (!Element_CheckExact(item))
1295 continue;
1296 Py_INCREF(item);
1297 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1298 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1299 Py_DECREF(item);
1300 Py_DECREF(out);
1301 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001303 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001304 }
1305
1306 return out;
1307}
1308
Serhiy Storchakacb985562015-05-04 15:32:48 +03001309/*[clinic input]
1310_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001311
Serhiy Storchakacb985562015-05-04 15:32:48 +03001312 path: object
1313 namespaces: object = None
1314
1315[clinic start generated code]*/
1316
1317static PyObject *
1318_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1319 PyObject *namespaces)
1320/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1321{
1322 PyObject* tag = path;
1323 _Py_IDENTIFIER(iterfind);
1324 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001325
Victor Stinnerf5616342016-12-09 15:26:00 +01001326 return _PyObject_CallMethodIdObjArgs(
1327 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.get
1332
1333 key: object
1334 default: object = None
1335
1336[clinic start generated code]*/
1337
1338static PyObject *
1339_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1340 PyObject *default_value)
1341/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342{
1343 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344
1345 if (!self->extra || self->extra->attrib == Py_None)
1346 value = default_value;
1347 else {
1348 value = PyDict_GetItem(self->extra->attrib, key);
1349 if (!value)
1350 value = default_value;
1351 }
1352
1353 Py_INCREF(value);
1354 return value;
1355}
1356
Serhiy Storchakacb985562015-05-04 15:32:48 +03001357/*[clinic input]
1358_elementtree.Element.getchildren
1359
1360[clinic start generated code]*/
1361
1362static PyObject *
1363_elementtree_Element_getchildren_impl(ElementObject *self)
1364/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001366 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 PyObject* list;
1368
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001369 /* FIXME: report as deprecated? */
1370
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371 if (!self->extra)
1372 return PyList_New(0);
1373
1374 list = PyList_New(self->extra->length);
1375 if (!list)
1376 return NULL;
1377
1378 for (i = 0; i < self->extra->length; i++) {
1379 PyObject* item = self->extra->children[i];
1380 Py_INCREF(item);
1381 PyList_SET_ITEM(list, i, item);
1382 }
1383
1384 return list;
1385}
1386
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001387
Eli Bendersky64d11e62012-06-15 07:42:50 +03001388static PyObject *
1389create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1390
1391
Serhiy Storchakacb985562015-05-04 15:32:48 +03001392/*[clinic input]
1393_elementtree.Element.iter
1394
1395 tag: object = None
1396
1397[clinic start generated code]*/
1398
Eli Bendersky64d11e62012-06-15 07:42:50 +03001399static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001400_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1401/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001402{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001403 if (PyUnicode_Check(tag)) {
1404 if (PyUnicode_READY(tag) < 0)
1405 return NULL;
1406 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1407 tag = Py_None;
1408 }
1409 else if (PyBytes_Check(tag)) {
1410 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1411 tag = Py_None;
1412 }
1413
Eli Bendersky64d11e62012-06-15 07:42:50 +03001414 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001415}
1416
1417
Serhiy Storchakacb985562015-05-04 15:32:48 +03001418/*[clinic input]
1419_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421[clinic start generated code]*/
1422
1423static PyObject *
1424_elementtree_Element_itertext_impl(ElementObject *self)
1425/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1426{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001427 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001428}
1429
Eli Bendersky64d11e62012-06-15 07:42:50 +03001430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001432element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001433{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001434 ElementObject* self = (ElementObject*) self_;
1435
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436 if (!self->extra || index < 0 || index >= self->extra->length) {
1437 PyErr_SetString(
1438 PyExc_IndexError,
1439 "child index out of range"
1440 );
1441 return NULL;
1442 }
1443
1444 Py_INCREF(self->extra->children[index]);
1445 return self->extra->children[index];
1446}
1447
Serhiy Storchakacb985562015-05-04 15:32:48 +03001448/*[clinic input]
1449_elementtree.Element.insert
1450
1451 index: Py_ssize_t
1452 subelement: object(subclass_of='&Element_Type')
1453 /
1454
1455[clinic start generated code]*/
1456
1457static PyObject *
1458_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1459 PyObject *subelement)
1460/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001461{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001462 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001463
Victor Stinner5f0af232013-07-11 23:01:36 +02001464 if (!self->extra) {
1465 if (create_extra(self, NULL) < 0)
1466 return NULL;
1467 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001468
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001469 if (index < 0) {
1470 index += self->extra->length;
1471 if (index < 0)
1472 index = 0;
1473 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001474 if (index > self->extra->length)
1475 index = self->extra->length;
1476
1477 if (element_resize(self, 1) < 0)
1478 return NULL;
1479
1480 for (i = self->extra->length; i > index; i--)
1481 self->extra->children[i] = self->extra->children[i-1];
1482
Serhiy Storchakacb985562015-05-04 15:32:48 +03001483 Py_INCREF(subelement);
1484 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001485
1486 self->extra->length++;
1487
1488 Py_RETURN_NONE;
1489}
1490
Serhiy Storchakacb985562015-05-04 15:32:48 +03001491/*[clinic input]
1492_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493
Serhiy Storchakacb985562015-05-04 15:32:48 +03001494[clinic start generated code]*/
1495
1496static PyObject *
1497_elementtree_Element_items_impl(ElementObject *self)
1498/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1499{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001500 if (!self->extra || self->extra->attrib == Py_None)
1501 return PyList_New(0);
1502
1503 return PyDict_Items(self->extra->attrib);
1504}
1505
Serhiy Storchakacb985562015-05-04 15:32:48 +03001506/*[clinic input]
1507_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001508
Serhiy Storchakacb985562015-05-04 15:32:48 +03001509[clinic start generated code]*/
1510
1511static PyObject *
1512_elementtree_Element_keys_impl(ElementObject *self)
1513/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1514{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001515 if (!self->extra || self->extra->attrib == Py_None)
1516 return PyList_New(0);
1517
1518 return PyDict_Keys(self->extra->attrib);
1519}
1520
Martin v. Löwis18e16552006-02-15 17:27:45 +00001521static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522element_length(ElementObject* self)
1523{
1524 if (!self->extra)
1525 return 0;
1526
1527 return self->extra->length;
1528}
1529
Serhiy Storchakacb985562015-05-04 15:32:48 +03001530/*[clinic input]
1531_elementtree.Element.makeelement
1532
1533 tag: object
1534 attrib: object
1535 /
1536
1537[clinic start generated code]*/
1538
1539static PyObject *
1540_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1541 PyObject *attrib)
1542/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543{
1544 PyObject* elem;
1545
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546 attrib = PyDict_Copy(attrib);
1547 if (!attrib)
1548 return NULL;
1549
Eli Bendersky092af1f2012-03-04 07:14:03 +02001550 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001551
1552 Py_DECREF(attrib);
1553
1554 return elem;
1555}
1556
Serhiy Storchakacb985562015-05-04 15:32:48 +03001557/*[clinic input]
1558_elementtree.Element.remove
1559
1560 subelement: object(subclass_of='&Element_Type')
1561 /
1562
1563[clinic start generated code]*/
1564
1565static PyObject *
1566_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1567/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001569 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001570 int rc;
1571 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573 if (!self->extra) {
1574 /* element has no children, so raise exception */
1575 PyErr_SetString(
1576 PyExc_ValueError,
1577 "list.remove(x): x not in list"
1578 );
1579 return NULL;
1580 }
1581
1582 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001583 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001585 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001586 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001588 if (rc < 0)
1589 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 }
1591
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001592 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001593 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001594 PyErr_SetString(
1595 PyExc_ValueError,
1596 "list.remove(x): x not in list"
1597 );
1598 return NULL;
1599 }
1600
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001601 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602
1603 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 for (; i < self->extra->length; i++)
1605 self->extra->children[i] = self->extra->children[i+1];
1606
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001607 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001608 Py_RETURN_NONE;
1609}
1610
1611static PyObject*
1612element_repr(ElementObject* self)
1613{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001614 int status;
1615
1616 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001617 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001618
1619 status = Py_ReprEnter((PyObject *)self);
1620 if (status == 0) {
1621 PyObject *res;
1622 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1623 Py_ReprLeave((PyObject *)self);
1624 return res;
1625 }
1626 if (status > 0)
1627 PyErr_Format(PyExc_RuntimeError,
1628 "reentrant call inside %s.__repr__",
1629 Py_TYPE(self)->tp_name);
1630 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001631}
1632
Serhiy Storchakacb985562015-05-04 15:32:48 +03001633/*[clinic input]
1634_elementtree.Element.set
1635
1636 key: object
1637 value: object
1638 /
1639
1640[clinic start generated code]*/
1641
1642static PyObject *
1643_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1644 PyObject *value)
1645/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001646{
1647 PyObject* attrib;
1648
Victor Stinner5f0af232013-07-11 23:01:36 +02001649 if (!self->extra) {
1650 if (create_extra(self, NULL) < 0)
1651 return NULL;
1652 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001653
1654 attrib = element_get_attrib(self);
1655 if (!attrib)
1656 return NULL;
1657
1658 if (PyDict_SetItem(attrib, key, value) < 0)
1659 return NULL;
1660
1661 Py_RETURN_NONE;
1662}
1663
1664static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001665element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001667 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001668 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001669 PyObject* old;
1670
1671 if (!self->extra || index < 0 || index >= self->extra->length) {
1672 PyErr_SetString(
1673 PyExc_IndexError,
1674 "child assignment index out of range");
1675 return -1;
1676 }
1677
1678 old = self->extra->children[index];
1679
1680 if (item) {
1681 Py_INCREF(item);
1682 self->extra->children[index] = item;
1683 } else {
1684 self->extra->length--;
1685 for (i = index; i < self->extra->length; i++)
1686 self->extra->children[i] = self->extra->children[i+1];
1687 }
1688
1689 Py_DECREF(old);
1690
1691 return 0;
1692}
1693
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001694static PyObject*
1695element_subscr(PyObject* self_, PyObject* item)
1696{
1697 ElementObject* self = (ElementObject*) self_;
1698
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001699 if (PyIndex_Check(item)) {
1700 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001701
1702 if (i == -1 && PyErr_Occurred()) {
1703 return NULL;
1704 }
1705 if (i < 0 && self->extra)
1706 i += self->extra->length;
1707 return element_getitem(self_, i);
1708 }
1709 else if (PySlice_Check(item)) {
1710 Py_ssize_t start, stop, step, slicelen, cur, i;
1711 PyObject* list;
1712
1713 if (!self->extra)
1714 return PyList_New(0);
1715
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001716 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001717 self->extra->length,
1718 &start, &stop, &step, &slicelen) < 0) {
1719 return NULL;
1720 }
1721
1722 if (slicelen <= 0)
1723 return PyList_New(0);
1724 else {
1725 list = PyList_New(slicelen);
1726 if (!list)
1727 return NULL;
1728
1729 for (cur = start, i = 0; i < slicelen;
1730 cur += step, i++) {
1731 PyObject* item = self->extra->children[cur];
1732 Py_INCREF(item);
1733 PyList_SET_ITEM(list, i, item);
1734 }
1735
1736 return list;
1737 }
1738 }
1739 else {
1740 PyErr_SetString(PyExc_TypeError,
1741 "element indices must be integers");
1742 return NULL;
1743 }
1744}
1745
1746static int
1747element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1748{
1749 ElementObject* self = (ElementObject*) self_;
1750
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001751 if (PyIndex_Check(item)) {
1752 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001753
1754 if (i == -1 && PyErr_Occurred()) {
1755 return -1;
1756 }
1757 if (i < 0 && self->extra)
1758 i += self->extra->length;
1759 return element_setitem(self_, i, value);
1760 }
1761 else if (PySlice_Check(item)) {
1762 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1763
1764 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001765 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766
Victor Stinner5f0af232013-07-11 23:01:36 +02001767 if (!self->extra) {
1768 if (create_extra(self, NULL) < 0)
1769 return -1;
1770 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001771
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001772 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001773 self->extra->length,
1774 &start, &stop, &step, &slicelen) < 0) {
1775 return -1;
1776 }
1777
Eli Bendersky865756a2012-03-09 13:38:15 +02001778 if (value == NULL) {
1779 /* Delete slice */
1780 size_t cur;
1781 Py_ssize_t i;
1782
1783 if (slicelen <= 0)
1784 return 0;
1785
1786 /* Since we're deleting, the direction of the range doesn't matter,
1787 * so for simplicity make it always ascending.
1788 */
1789 if (step < 0) {
1790 stop = start + 1;
1791 start = stop + step * (slicelen - 1) - 1;
1792 step = -step;
1793 }
1794
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001795 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001796
1797 /* recycle is a list that will contain all the children
1798 * scheduled for removal.
1799 */
1800 if (!(recycle = PyList_New(slicelen))) {
1801 PyErr_NoMemory();
1802 return -1;
1803 }
1804
1805 /* This loop walks over all the children that have to be deleted,
1806 * with cur pointing at them. num_moved is the amount of children
1807 * until the next deleted child that have to be "shifted down" to
1808 * occupy the deleted's places.
1809 * Note that in the ith iteration, shifting is done i+i places down
1810 * because i children were already removed.
1811 */
1812 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1813 /* Compute how many children have to be moved, clipping at the
1814 * list end.
1815 */
1816 Py_ssize_t num_moved = step - 1;
1817 if (cur + step >= (size_t)self->extra->length) {
1818 num_moved = self->extra->length - cur - 1;
1819 }
1820
1821 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1822
1823 memmove(
1824 self->extra->children + cur - i,
1825 self->extra->children + cur + 1,
1826 num_moved * sizeof(PyObject *));
1827 }
1828
1829 /* Leftover "tail" after the last removed child */
1830 cur = start + (size_t)slicelen * step;
1831 if (cur < (size_t)self->extra->length) {
1832 memmove(
1833 self->extra->children + cur - slicelen,
1834 self->extra->children + cur,
1835 (self->extra->length - cur) * sizeof(PyObject *));
1836 }
1837
1838 self->extra->length -= slicelen;
1839
1840 /* Discard the recycle list with all the deleted sub-elements */
1841 Py_XDECREF(recycle);
1842 return 0;
1843 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001844
1845 /* A new slice is actually being assigned */
1846 seq = PySequence_Fast(value, "");
1847 if (!seq) {
1848 PyErr_Format(
1849 PyExc_TypeError,
1850 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1851 );
1852 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001854 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855
1856 if (step != 1 && newlen != slicelen)
1857 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001858 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001860 "attempt to assign sequence of size %zd "
1861 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001862 newlen, slicelen
1863 );
1864 return -1;
1865 }
1866
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001867 /* Resize before creating the recycle bin, to prevent refleaks. */
1868 if (newlen > slicelen) {
1869 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001870 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001871 return -1;
1872 }
1873 }
1874
1875 if (slicelen > 0) {
1876 /* to avoid recursive calls to this method (via decref), move
1877 old items to the recycle bin here, and get rid of them when
1878 we're done modifying the element */
1879 recycle = PyList_New(slicelen);
1880 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001881 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001882 return -1;
1883 }
1884 for (cur = start, i = 0; i < slicelen;
1885 cur += step, i++)
1886 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1887 }
1888
1889 if (newlen < slicelen) {
1890 /* delete slice */
1891 for (i = stop; i < self->extra->length; i++)
1892 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1893 } else if (newlen > slicelen) {
1894 /* insert slice */
1895 for (i = self->extra->length-1; i >= stop; i--)
1896 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1897 }
1898
1899 /* replace the slice */
1900 for (cur = start, i = 0; i < newlen;
1901 cur += step, i++) {
1902 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1903 Py_INCREF(element);
1904 self->extra->children[cur] = element;
1905 }
1906
1907 self->extra->length += newlen - slicelen;
1908
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001909 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001910
1911 /* discard the recycle bin, and everything in it */
1912 Py_XDECREF(recycle);
1913
1914 return 0;
1915 }
1916 else {
1917 PyErr_SetString(PyExc_TypeError,
1918 "element indices must be integers");
1919 return -1;
1920 }
1921}
1922
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001923static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001924element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001925{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001926 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001927 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001928 return res;
1929}
1930
Serhiy Storchakadde08152015-11-25 15:28:13 +02001931static PyObject*
1932element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001933{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001934 PyObject *res = element_get_text(self);
1935 Py_XINCREF(res);
1936 return res;
1937}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001938
Serhiy Storchakadde08152015-11-25 15:28:13 +02001939static PyObject*
1940element_tail_getter(ElementObject *self, void *closure)
1941{
1942 PyObject *res = element_get_tail(self);
1943 Py_XINCREF(res);
1944 return res;
1945}
1946
1947static PyObject*
1948element_attrib_getter(ElementObject *self, void *closure)
1949{
1950 PyObject *res;
1951 if (!self->extra) {
1952 if (create_extra(self, NULL) < 0)
1953 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001954 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001955 res = element_get_attrib(self);
1956 Py_XINCREF(res);
1957 return res;
1958}
Victor Stinner4d463432013-07-11 23:05:03 +02001959
Serhiy Storchakadde08152015-11-25 15:28:13 +02001960/* macro for setter validation */
1961#define _VALIDATE_ATTR_VALUE(V) \
1962 if ((V) == NULL) { \
1963 PyErr_SetString( \
1964 PyExc_AttributeError, \
1965 "can't delete element attribute"); \
1966 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001967 }
1968
Serhiy Storchakadde08152015-11-25 15:28:13 +02001969static int
1970element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1971{
1972 _VALIDATE_ATTR_VALUE(value);
1973 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001974 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001975 return 0;
1976}
1977
1978static int
1979element_text_setter(ElementObject *self, PyObject *value, void *closure)
1980{
1981 _VALIDATE_ATTR_VALUE(value);
1982 Py_INCREF(value);
1983 Py_DECREF(JOIN_OBJ(self->text));
1984 self->text = value;
1985 return 0;
1986}
1987
1988static int
1989element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1990{
1991 _VALIDATE_ATTR_VALUE(value);
1992 Py_INCREF(value);
1993 Py_DECREF(JOIN_OBJ(self->tail));
1994 self->tail = value;
1995 return 0;
1996}
1997
1998static int
1999element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2000{
2001 _VALIDATE_ATTR_VALUE(value);
2002 if (!self->extra) {
2003 if (create_extra(self, NULL) < 0)
2004 return -1;
2005 }
2006 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002007 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002008 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002009}
2010
2011static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002012 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002013 0, /* sq_concat */
2014 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002015 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002016 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002017 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002018 0,
2019};
2020
Eli Bendersky64d11e62012-06-15 07:42:50 +03002021/******************************* Element iterator ****************************/
2022
2023/* ElementIterObject represents the iteration state over an XML element in
2024 * pre-order traversal. To keep track of which sub-element should be returned
2025 * next, a stack of parents is maintained. This is a standard stack-based
2026 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002027 * The stack is managed using a continuous array.
2028 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 * the current one is exhausted, and the next child to examine in that parent.
2030 */
2031typedef struct ParentLocator_t {
2032 ElementObject *parent;
2033 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002034} ParentLocator;
2035
2036typedef struct {
2037 PyObject_HEAD
2038 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002039 Py_ssize_t parent_stack_used;
2040 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002041 ElementObject *root_element;
2042 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043 int gettext;
2044} ElementIterObject;
2045
2046
2047static void
2048elementiter_dealloc(ElementIterObject *it)
2049{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002050 Py_ssize_t i = it->parent_stack_used;
2051 it->parent_stack_used = 0;
2052 while (i--)
2053 Py_XDECREF(it->parent_stack[i].parent);
2054 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002055
2056 Py_XDECREF(it->sought_tag);
2057 Py_XDECREF(it->root_element);
2058
2059 PyObject_GC_UnTrack(it);
2060 PyObject_GC_Del(it);
2061}
2062
2063static int
2064elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2065{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002066 Py_ssize_t i = it->parent_stack_used;
2067 while (i--)
2068 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002069
2070 Py_VISIT(it->root_element);
2071 Py_VISIT(it->sought_tag);
2072 return 0;
2073}
2074
2075/* Helper function for elementiter_next. Add a new parent to the parent stack.
2076 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077static int
2078parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002079{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002080 ParentLocator *item;
2081
2082 if (it->parent_stack_used >= it->parent_stack_size) {
2083 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2084 ParentLocator *parent_stack = it->parent_stack;
2085 PyMem_Resize(parent_stack, ParentLocator, new_size);
2086 if (parent_stack == NULL)
2087 return -1;
2088 it->parent_stack = parent_stack;
2089 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002091 item = it->parent_stack + it->parent_stack_used++;
2092 Py_INCREF(parent);
2093 item->parent = parent;
2094 item->child_index = 0;
2095 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002096}
2097
2098static PyObject *
2099elementiter_next(ElementIterObject *it)
2100{
2101 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002102 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103 * A short note on gettext: this function serves both the iter() and
2104 * itertext() methods to avoid code duplication. However, there are a few
2105 * small differences in the way these iterations work. Namely:
2106 * - itertext() only yields text from nodes that have it, and continues
2107 * iterating when a node doesn't have text (so it doesn't return any
2108 * node like iter())
2109 * - itertext() also has to handle tail, after finishing with all the
2110 * children of a node.
2111 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002112 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002113 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002114 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115
2116 while (1) {
2117 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002118 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 * iterator is exhausted.
2120 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002121 if (!it->parent_stack_used) {
2122 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123 PyErr_SetNone(PyExc_StopIteration);
2124 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 }
2126
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002127 elem = it->root_element; /* steals a reference */
2128 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129 }
2130 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002131 /* See if there are children left to traverse in the current parent. If
2132 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002134 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2135 Py_ssize_t child_index = item->child_index;
2136 ElementObjectExtra *extra;
2137 elem = item->parent;
2138 extra = elem->extra;
2139 if (!extra || child_index >= extra->length) {
2140 it->parent_stack_used--;
2141 /* Note that extra condition on it->parent_stack_used here;
2142 * this is because itertext() is supposed to only return *inner*
2143 * text, not text following the element it began iteration with.
2144 */
2145 if (it->gettext && it->parent_stack_used) {
2146 text = element_get_tail(elem);
2147 goto gettext;
2148 }
2149 Py_DECREF(elem);
2150 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002151 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002152
Serhiy Storchaka576def02017-03-30 09:47:31 +03002153 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2154 PyErr_Format(PyExc_AttributeError,
2155 "'%.100s' object has no attribute 'iter'",
2156 Py_TYPE(extra->children[child_index])->tp_name);
2157 return NULL;
2158 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002159 elem = (ElementObject *)extra->children[child_index];
2160 item->child_index++;
2161 Py_INCREF(elem);
2162 }
2163
2164 if (parent_stack_push_new(it, elem) < 0) {
2165 Py_DECREF(elem);
2166 PyErr_NoMemory();
2167 return NULL;
2168 }
2169 if (it->gettext) {
2170 text = element_get_text(elem);
2171 goto gettext;
2172 }
2173
2174 if (it->sought_tag == Py_None)
2175 return (PyObject *)elem;
2176
2177 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2178 if (rc > 0)
2179 return (PyObject *)elem;
2180
2181 Py_DECREF(elem);
2182 if (rc < 0)
2183 return NULL;
2184 continue;
2185
2186gettext:
2187 if (!text) {
2188 Py_DECREF(elem);
2189 return NULL;
2190 }
2191 if (text == Py_None) {
2192 Py_DECREF(elem);
2193 }
2194 else {
2195 Py_INCREF(text);
2196 Py_DECREF(elem);
2197 rc = PyObject_IsTrue(text);
2198 if (rc > 0)
2199 return text;
2200 Py_DECREF(text);
2201 if (rc < 0)
2202 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002203 }
2204 }
2205
2206 return NULL;
2207}
2208
2209
2210static PyTypeObject ElementIter_Type = {
2211 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002212 /* Using the module's name since the pure-Python implementation does not
2213 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002214 "_elementtree._element_iterator", /* tp_name */
2215 sizeof(ElementIterObject), /* tp_basicsize */
2216 0, /* tp_itemsize */
2217 /* methods */
2218 (destructor)elementiter_dealloc, /* tp_dealloc */
2219 0, /* tp_print */
2220 0, /* tp_getattr */
2221 0, /* tp_setattr */
2222 0, /* tp_reserved */
2223 0, /* tp_repr */
2224 0, /* tp_as_number */
2225 0, /* tp_as_sequence */
2226 0, /* tp_as_mapping */
2227 0, /* tp_hash */
2228 0, /* tp_call */
2229 0, /* tp_str */
2230 0, /* tp_getattro */
2231 0, /* tp_setattro */
2232 0, /* tp_as_buffer */
2233 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2234 0, /* tp_doc */
2235 (traverseproc)elementiter_traverse, /* tp_traverse */
2236 0, /* tp_clear */
2237 0, /* tp_richcompare */
2238 0, /* tp_weaklistoffset */
2239 PyObject_SelfIter, /* tp_iter */
2240 (iternextfunc)elementiter_next, /* tp_iternext */
2241 0, /* tp_methods */
2242 0, /* tp_members */
2243 0, /* tp_getset */
2244 0, /* tp_base */
2245 0, /* tp_dict */
2246 0, /* tp_descr_get */
2247 0, /* tp_descr_set */
2248 0, /* tp_dictoffset */
2249 0, /* tp_init */
2250 0, /* tp_alloc */
2251 0, /* tp_new */
2252};
2253
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002254#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002255
2256static PyObject *
2257create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2258{
2259 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260
2261 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2262 if (!it)
2263 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002264
Victor Stinner4d463432013-07-11 23:05:03 +02002265 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002266 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002267 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002268 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002269 it->root_element = self;
2270
Eli Bendersky64d11e62012-06-15 07:42:50 +03002271 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002272
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002273 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002274 if (it->parent_stack == NULL) {
2275 Py_DECREF(it);
2276 PyErr_NoMemory();
2277 return NULL;
2278 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002279 it->parent_stack_used = 0;
2280 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002281
Eli Bendersky64d11e62012-06-15 07:42:50 +03002282 return (PyObject *)it;
2283}
2284
2285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286/* ==================================================================== */
2287/* the tree builder type */
2288
2289typedef struct {
2290 PyObject_HEAD
2291
Eli Bendersky58d548d2012-05-29 15:45:16 +03002292 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293
Antoine Pitrouee329312012-10-04 19:53:29 +02002294 PyObject *this; /* current node */
2295 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296
Eli Bendersky58d548d2012-05-29 15:45:16 +03002297 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002298
Eli Bendersky58d548d2012-05-29 15:45:16 +03002299 PyObject *stack; /* element stack */
2300 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002301
Eli Bendersky48d358b2012-05-30 17:57:50 +03002302 PyObject *element_factory;
2303
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002304 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002305 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2307 PyObject *end_event_obj;
2308 PyObject *start_ns_event_obj;
2309 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310} TreeBuilderObject;
2311
Christian Heimes90aa7642007-12-19 02:45:37 +00002312#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002313
2314/* -------------------------------------------------------------------- */
2315/* constructor and destructor */
2316
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317static PyObject *
2318treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002320 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2321 if (t != NULL) {
2322 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002323
Eli Bendersky58d548d2012-05-29 15:45:16 +03002324 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002325 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002327 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002330 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002331 t->stack = PyList_New(20);
2332 if (!t->stack) {
2333 Py_DECREF(t->this);
2334 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002335 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002336 return NULL;
2337 }
2338 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002340 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002341 t->start_event_obj = t->end_event_obj = NULL;
2342 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2343 }
2344 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345}
2346
Serhiy Storchakacb985562015-05-04 15:32:48 +03002347/*[clinic input]
2348_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349
Serhiy Storchakacb985562015-05-04 15:32:48 +03002350 element_factory: object = NULL
2351
2352[clinic start generated code]*/
2353
2354static int
2355_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2356 PyObject *element_factory)
2357/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2358{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002359 if (element_factory) {
2360 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002361 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002362 }
2363
Eli Bendersky58d548d2012-05-29 15:45:16 +03002364 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365}
2366
Eli Bendersky48d358b2012-05-30 17:57:50 +03002367static int
2368treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2369{
2370 Py_VISIT(self->root);
2371 Py_VISIT(self->this);
2372 Py_VISIT(self->last);
2373 Py_VISIT(self->data);
2374 Py_VISIT(self->stack);
2375 Py_VISIT(self->element_factory);
2376 return 0;
2377}
2378
2379static int
2380treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002382 Py_CLEAR(self->end_ns_event_obj);
2383 Py_CLEAR(self->start_ns_event_obj);
2384 Py_CLEAR(self->end_event_obj);
2385 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002386 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002387 Py_CLEAR(self->stack);
2388 Py_CLEAR(self->data);
2389 Py_CLEAR(self->last);
2390 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002391 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002392 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002393 return 0;
2394}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395
Eli Bendersky48d358b2012-05-30 17:57:50 +03002396static void
2397treebuilder_dealloc(TreeBuilderObject *self)
2398{
2399 PyObject_GC_UnTrack(self);
2400 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002401 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002402}
2403
2404/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002405/* helpers for handling of arbitrary element-like objects */
2406
2407static int
Serhiy Storchaka576def02017-03-30 09:47:31 +03002408treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002409 PyObject **dest, _Py_Identifier *name)
2410{
2411 if (Element_CheckExact(element)) {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002412 PyObject *tmp = JOIN_OBJ(*dest);
2413 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2414 *data = NULL;
2415 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002416 return 0;
2417 }
2418 else {
Serhiy Storchaka576def02017-03-30 09:47:31 +03002419 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002420 int r;
2421 if (joined == NULL)
2422 return -1;
2423 r = _PyObject_SetAttrId(element, name, joined);
2424 Py_DECREF(joined);
Serhiy Storchaka576def02017-03-30 09:47:31 +03002425 if (r < 0)
2426 return -1;
2427 Py_CLEAR(*data);
2428 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002429 }
2430}
2431
Serhiy Storchaka576def02017-03-30 09:47:31 +03002432LOCAL(int)
2433treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002434{
Serhiy Storchaka576def02017-03-30 09:47:31 +03002435 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002436
Serhiy Storchaka576def02017-03-30 09:47:31 +03002437 if (!self->data) {
2438 return 0;
2439 }
2440
2441 if (self->this == element) {
2442 _Py_IDENTIFIER(text);
2443 return treebuilder_set_element_text_or_tail(
2444 element, &self->data,
2445 &((ElementObject *) element)->text, &PyId_text);
2446 }
2447 else {
2448 _Py_IDENTIFIER(tail);
2449 return treebuilder_set_element_text_or_tail(
2450 element, &self->data,
2451 &((ElementObject *) element)->tail, &PyId_tail);
2452 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002453}
2454
2455static int
2456treebuilder_add_subelement(PyObject *element, PyObject *child)
2457{
2458 _Py_IDENTIFIER(append);
2459 if (Element_CheckExact(element)) {
2460 ElementObject *elem = (ElementObject *) element;
2461 return element_add_subelement(elem, child);
2462 }
2463 else {
2464 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002465 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002466 if (res == NULL)
2467 return -1;
2468 Py_DECREF(res);
2469 return 0;
2470 }
2471}
2472
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002473LOCAL(int)
2474treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2475 PyObject *node)
2476{
2477 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002478 PyObject *res;
2479 PyObject *event = PyTuple_Pack(2, action, node);
2480 if (event == NULL)
2481 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002482 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002483 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002484 if (res == NULL)
2485 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002486 Py_DECREF(res);
2487 }
2488 return 0;
2489}
2490
Antoine Pitrouee329312012-10-04 19:53:29 +02002491/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492/* handlers */
2493
2494LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2496 PyObject* attrib)
2497{
2498 PyObject* node;
2499 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002500 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501
Serhiy Storchaka576def02017-03-30 09:47:31 +03002502 if (treebuilder_flush_data(self) < 0) {
2503 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 }
2505
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002506 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002507 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002508 } else if (attrib == Py_None) {
2509 attrib = PyDict_New();
2510 if (!attrib)
2511 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002512 node = PyObject_CallFunctionObjArgs(self->element_factory,
2513 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002514 Py_DECREF(attrib);
2515 }
2516 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002517 node = PyObject_CallFunctionObjArgs(self->element_factory,
2518 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002519 }
2520 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002522 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523
Antoine Pitrouee329312012-10-04 19:53:29 +02002524 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525
2526 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002527 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002528 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 } else {
2530 if (self->root) {
2531 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002532 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 "multiple elements on top level"
2534 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002535 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 }
2537 Py_INCREF(node);
2538 self->root = node;
2539 }
2540
2541 if (self->index < PyList_GET_SIZE(self->stack)) {
2542 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002543 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 Py_INCREF(this);
2545 } else {
2546 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002547 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 }
2549 self->index++;
2550
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002552 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002554 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002556 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2557 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558
2559 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002560
2561 error:
2562 Py_DECREF(node);
2563 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564}
2565
2566LOCAL(PyObject*)
2567treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2568{
2569 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002570 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002571 /* ignore calls to data before the first call to start */
2572 Py_RETURN_NONE;
2573 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 /* store the first item as is */
2575 Py_INCREF(data); self->data = data;
2576 } else {
2577 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002578 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2579 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002580 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581 /* expat often generates single character data sections; handle
2582 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002583 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2584 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002585 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002586 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587 } else if (PyList_CheckExact(self->data)) {
2588 if (PyList_Append(self->data, data) < 0)
2589 return NULL;
2590 } else {
2591 PyObject* list = PyList_New(2);
2592 if (!list)
2593 return NULL;
2594 PyList_SET_ITEM(list, 0, self->data);
2595 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2596 self->data = list;
2597 }
2598 }
2599
2600 Py_RETURN_NONE;
2601}
2602
2603LOCAL(PyObject*)
2604treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2605{
2606 PyObject* item;
2607
Serhiy Storchaka576def02017-03-30 09:47:31 +03002608 if (treebuilder_flush_data(self) < 0) {
2609 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610 }
2611
2612 if (self->index == 0) {
2613 PyErr_SetString(
2614 PyExc_IndexError,
2615 "pop from empty stack"
2616 );
2617 return NULL;
2618 }
2619
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002620 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002621 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002622 self->index--;
2623 self->this = PyList_GET_ITEM(self->stack, self->index);
2624 Py_INCREF(self->this);
2625 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002627 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2628 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629
2630 Py_INCREF(self->last);
2631 return (PyObject*) self->last;
2632}
2633
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634/* -------------------------------------------------------------------- */
2635/* methods (in alphabetical order) */
2636
Serhiy Storchakacb985562015-05-04 15:32:48 +03002637/*[clinic input]
2638_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002639
Serhiy Storchakacb985562015-05-04 15:32:48 +03002640 data: object
2641 /
2642
2643[clinic start generated code]*/
2644
2645static PyObject *
2646_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2647/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2648{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649 return treebuilder_handle_data(self, data);
2650}
2651
Serhiy Storchakacb985562015-05-04 15:32:48 +03002652/*[clinic input]
2653_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002654
Serhiy Storchakacb985562015-05-04 15:32:48 +03002655 tag: object
2656 /
2657
2658[clinic start generated code]*/
2659
2660static PyObject *
2661_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2662/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2663{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002664 return treebuilder_handle_end(self, tag);
2665}
2666
2667LOCAL(PyObject*)
2668treebuilder_done(TreeBuilderObject* self)
2669{
2670 PyObject* res;
2671
2672 /* FIXME: check stack size? */
2673
2674 if (self->root)
2675 res = self->root;
2676 else
2677 res = Py_None;
2678
2679 Py_INCREF(res);
2680 return res;
2681}
2682
Serhiy Storchakacb985562015-05-04 15:32:48 +03002683/*[clinic input]
2684_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685
Serhiy Storchakacb985562015-05-04 15:32:48 +03002686[clinic start generated code]*/
2687
2688static PyObject *
2689_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2690/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2691{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692 return treebuilder_done(self);
2693}
2694
Serhiy Storchakacb985562015-05-04 15:32:48 +03002695/*[clinic input]
2696_elementtree.TreeBuilder.start
2697
2698 tag: object
2699 attrs: object = None
2700 /
2701
2702[clinic start generated code]*/
2703
2704static PyObject *
2705_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2706 PyObject *attrs)
2707/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002709 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710}
2711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712/* ==================================================================== */
2713/* the expat interface */
2714
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002717
2718/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2719 * cached globally without being in per-module state.
2720 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002721static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723
Eli Bendersky52467b12012-06-01 07:13:08 +03002724static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2725 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2726
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727typedef struct {
2728 PyObject_HEAD
2729
2730 XML_Parser parser;
2731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *target;
2733 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002735 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002737 PyObject *handle_start;
2738 PyObject *handle_data;
2739 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002741 PyObject *handle_comment;
2742 PyObject *handle_pi;
2743 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002745 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002746
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747} XMLParserObject;
2748
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002749static PyObject*
Victor Stinner0c4a8282017-01-17 02:21:47 +01002750_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002751static PyObject *
2752_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2753 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755/* helpers */
2756
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757LOCAL(PyObject*)
2758makeuniversal(XMLParserObject* self, const char* string)
2759{
2760 /* convert a UTF-8 tag/attribute name from the expat parser
2761 to a universal name string */
2762
Antoine Pitrouc1948842012-10-01 23:40:37 +02002763 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764 PyObject* key;
2765 PyObject* value;
2766
2767 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002768 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769 if (!key)
2770 return NULL;
2771
2772 value = PyDict_GetItem(self->names, key);
2773
2774 if (value) {
2775 Py_INCREF(value);
2776 } else {
2777 /* new name. convert to universal name, and decode as
2778 necessary */
2779
2780 PyObject* tag;
2781 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002782 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783
2784 /* look for namespace separator */
2785 for (i = 0; i < size; i++)
2786 if (string[i] == '}')
2787 break;
2788 if (i != size) {
2789 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002790 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002791 if (tag == NULL) {
2792 Py_DECREF(key);
2793 return NULL;
2794 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002795 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 p[0] = '{';
2797 memcpy(p+1, string, size);
2798 size++;
2799 } else {
2800 /* plain name; use key as tag */
2801 Py_INCREF(key);
2802 tag = key;
2803 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002804
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002806 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002807 value = PyUnicode_DecodeUTF8(p, size, "strict");
2808 Py_DECREF(tag);
2809 if (!value) {
2810 Py_DECREF(key);
2811 return NULL;
2812 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002813
2814 /* add to names dictionary */
2815 if (PyDict_SetItem(self->names, key, value) < 0) {
2816 Py_DECREF(key);
2817 Py_DECREF(value);
2818 return NULL;
2819 }
2820 }
2821
2822 Py_DECREF(key);
2823 return value;
2824}
2825
Eli Bendersky5b77d812012-03-16 08:20:05 +02002826/* Set the ParseError exception with the given parameters.
2827 * If message is not NULL, it's used as the error string. Otherwise, the
2828 * message string is the default for the given error_code.
2829*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002830static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002831expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2832 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002833{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002834 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002835 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002836
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002837 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002838 message ? message : EXPAT(ErrorString)(error_code),
2839 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002840 if (errmsg == NULL)
2841 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002842
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002843 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002844 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002845 if (!error)
2846 return;
2847
Eli Bendersky5b77d812012-03-16 08:20:05 +02002848 /* Add code and position attributes */
2849 code = PyLong_FromLong((long)error_code);
2850 if (!code) {
2851 Py_DECREF(error);
2852 return;
2853 }
2854 if (PyObject_SetAttrString(error, "code", code) == -1) {
2855 Py_DECREF(error);
2856 Py_DECREF(code);
2857 return;
2858 }
2859 Py_DECREF(code);
2860
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002861 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002862 if (!position) {
2863 Py_DECREF(error);
2864 return;
2865 }
2866 if (PyObject_SetAttrString(error, "position", position) == -1) {
2867 Py_DECREF(error);
2868 Py_DECREF(position);
2869 return;
2870 }
2871 Py_DECREF(position);
2872
Eli Bendersky532d03e2013-08-10 08:00:39 -07002873 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002874 Py_DECREF(error);
2875}
2876
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877/* -------------------------------------------------------------------- */
2878/* handlers */
2879
2880static void
2881expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2882 int data_len)
2883{
2884 PyObject* key;
2885 PyObject* value;
2886 PyObject* res;
2887
2888 if (data_len < 2 || data_in[0] != '&')
2889 return;
2890
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002891 if (PyErr_Occurred())
2892 return;
2893
Neal Norwitz0269b912007-08-08 06:56:02 +00002894 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002895 if (!key)
2896 return;
2897
2898 value = PyDict_GetItem(self->entity, key);
2899
2900 if (value) {
2901 if (TreeBuilder_CheckExact(self->target))
2902 res = treebuilder_handle_data(
2903 (TreeBuilderObject*) self->target, value
2904 );
2905 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002906 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907 else
2908 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002910 } else if (!PyErr_Occurred()) {
2911 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002912 char message[128] = "undefined entity ";
2913 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002914 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002915 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002916 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002917 EXPAT(GetErrorColumnNumber)(self->parser),
2918 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919 );
2920 }
2921
2922 Py_DECREF(key);
2923}
2924
2925static void
2926expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2927 const XML_Char **attrib_in)
2928{
2929 PyObject* res;
2930 PyObject* tag;
2931 PyObject* attrib;
2932 int ok;
2933
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002934 if (PyErr_Occurred())
2935 return;
2936
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 /* tag name */
2938 tag = makeuniversal(self, tag_in);
2939 if (!tag)
2940 return; /* parser will look for errors */
2941
2942 /* attributes */
2943 if (attrib_in[0]) {
2944 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002945 if (!attrib) {
2946 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002948 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 while (attrib_in[0] && attrib_in[1]) {
2950 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002951 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952 if (!key || !value) {
2953 Py_XDECREF(value);
2954 Py_XDECREF(key);
2955 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002956 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002957 return;
2958 }
2959 ok = PyDict_SetItem(attrib, key, value);
2960 Py_DECREF(value);
2961 Py_DECREF(key);
2962 if (ok < 0) {
2963 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002964 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 return;
2966 }
2967 attrib_in += 2;
2968 }
2969 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002970 Py_INCREF(Py_None);
2971 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002972 }
2973
2974 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002975 /* shortcut */
2976 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2977 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002978 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002979 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002980 if (attrib == Py_None) {
2981 Py_DECREF(attrib);
2982 attrib = PyDict_New();
2983 if (!attrib) {
2984 Py_DECREF(tag);
2985 return;
2986 }
2987 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002988 res = PyObject_CallFunctionObjArgs(self->handle_start,
2989 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002990 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002991 res = NULL;
2992
2993 Py_DECREF(tag);
2994 Py_DECREF(attrib);
2995
2996 Py_XDECREF(res);
2997}
2998
2999static void
3000expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3001 int data_len)
3002{
3003 PyObject* data;
3004 PyObject* res;
3005
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003006 if (PyErr_Occurred())
3007 return;
3008
Neal Norwitz0269b912007-08-08 06:56:02 +00003009 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003010 if (!data)
3011 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012
3013 if (TreeBuilder_CheckExact(self->target))
3014 /* shortcut */
3015 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3016 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003017 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 else
3019 res = NULL;
3020
3021 Py_DECREF(data);
3022
3023 Py_XDECREF(res);
3024}
3025
3026static void
3027expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3028{
3029 PyObject* tag;
3030 PyObject* res = NULL;
3031
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003032 if (PyErr_Occurred())
3033 return;
3034
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003035 if (TreeBuilder_CheckExact(self->target))
3036 /* shortcut */
3037 /* the standard tree builder doesn't look at the end tag */
3038 res = treebuilder_handle_end(
3039 (TreeBuilderObject*) self->target, Py_None
3040 );
3041 else if (self->handle_end) {
3042 tag = makeuniversal(self, tag_in);
3043 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003044 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003045 Py_DECREF(tag);
3046 }
3047 }
3048
3049 Py_XDECREF(res);
3050}
3051
3052static void
3053expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3054 const XML_Char *uri)
3055{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003056 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3057 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003058
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003059 if (PyErr_Occurred())
3060 return;
3061
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003062 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003063 return;
3064
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003065 if (!uri)
3066 uri = "";
3067 if (!prefix)
3068 prefix = "";
3069
3070 parcel = Py_BuildValue("ss", prefix, uri);
3071 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003072 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003073 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3074 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003075}
3076
3077static void
3078expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3079{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003080 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3081
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003082 if (PyErr_Occurred())
3083 return;
3084
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003085 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003086 return;
3087
3088 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089}
3090
3091static void
3092expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3093{
3094 PyObject* comment;
3095 PyObject* res;
3096
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003097 if (PyErr_Occurred())
3098 return;
3099
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003101 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003102 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003103 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3104 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105 Py_XDECREF(res);
3106 Py_DECREF(comment);
3107 }
3108 }
3109}
3110
Eli Bendersky45839902013-01-13 05:14:47 -08003111static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003112expat_start_doctype_handler(XMLParserObject *self,
3113 const XML_Char *doctype_name,
3114 const XML_Char *sysid,
3115 const XML_Char *pubid,
3116 int has_internal_subset)
3117{
3118 PyObject *self_pyobj = (PyObject *)self;
3119 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3120 PyObject *parser_doctype = NULL;
3121 PyObject *res = NULL;
3122
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003123 if (PyErr_Occurred())
3124 return;
3125
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003126 doctype_name_obj = makeuniversal(self, doctype_name);
3127 if (!doctype_name_obj)
3128 return;
3129
3130 if (sysid) {
3131 sysid_obj = makeuniversal(self, sysid);
3132 if (!sysid_obj) {
3133 Py_DECREF(doctype_name_obj);
3134 return;
3135 }
3136 } else {
3137 Py_INCREF(Py_None);
3138 sysid_obj = Py_None;
3139 }
3140
3141 if (pubid) {
3142 pubid_obj = makeuniversal(self, pubid);
3143 if (!pubid_obj) {
3144 Py_DECREF(doctype_name_obj);
3145 Py_DECREF(sysid_obj);
3146 return;
3147 }
3148 } else {
3149 Py_INCREF(Py_None);
3150 pubid_obj = Py_None;
3151 }
3152
3153 /* If the target has a handler for doctype, call it. */
3154 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003155 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3156 doctype_name_obj, pubid_obj,
3157 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003158 Py_CLEAR(res);
3159 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003160 else {
3161 /* Now see if the parser itself has a doctype method. If yes and it's
3162 * a custom method, call it but warn about deprecation. If it's only
3163 * the vanilla XMLParser method, do nothing.
3164 */
3165 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3166 if (parser_doctype &&
3167 !(PyCFunction_Check(parser_doctype) &&
3168 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3169 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003170 (PyCFunction) _elementtree_XMLParser_doctype)) {
3171 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3172 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003173 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003174 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003175 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003176 res = PyObject_CallFunctionObjArgs(parser_doctype,
3177 doctype_name_obj, pubid_obj,
3178 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003179 Py_CLEAR(res);
3180 }
3181 }
3182
3183clear:
3184 Py_XDECREF(parser_doctype);
3185 Py_DECREF(doctype_name_obj);
3186 Py_DECREF(pubid_obj);
3187 Py_DECREF(sysid_obj);
3188}
3189
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190static void
3191expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3192 const XML_Char* data_in)
3193{
3194 PyObject* target;
3195 PyObject* data;
3196 PyObject* res;
3197
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003198 if (PyErr_Occurred())
3199 return;
3200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003202 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3203 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003205 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3206 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207 Py_XDECREF(res);
3208 Py_DECREF(data);
3209 Py_DECREF(target);
3210 } else {
3211 Py_XDECREF(data);
3212 Py_XDECREF(target);
3213 }
3214 }
3215}
3216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218
Eli Bendersky52467b12012-06-01 07:13:08 +03003219static PyObject *
3220xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221{
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3223 if (self) {
3224 self->parser = NULL;
3225 self->target = self->entity = self->names = NULL;
3226 self->handle_start = self->handle_data = self->handle_end = NULL;
3227 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003228 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 return (PyObject *)self;
3231}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232
Serhiy Storchakacb985562015-05-04 15:32:48 +03003233/*[clinic input]
3234_elementtree.XMLParser.__init__
3235
3236 html: object = NULL
3237 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003238 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003239
3240[clinic start generated code]*/
3241
Eli Bendersky52467b12012-06-01 07:13:08 +03003242static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3244 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003245/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003246{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 self->entity = PyDict_New();
3248 if (!self->entity)
3249 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250
Serhiy Storchakacb985562015-05-04 15:32:48 +03003251 self->names = PyDict_New();
3252 if (!self->names) {
3253 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003256
Serhiy Storchakacb985562015-05-04 15:32:48 +03003257 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3258 if (!self->parser) {
3259 Py_CLEAR(self->entity);
3260 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003262 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 }
3264
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 if (target) {
3266 Py_INCREF(target);
3267 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003268 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 Py_CLEAR(self->entity);
3271 Py_CLEAR(self->names);
3272 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003273 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003276 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277
Serhiy Storchakacb985562015-05-04 15:32:48 +03003278 self->handle_start = PyObject_GetAttrString(target, "start");
3279 self->handle_data = PyObject_GetAttrString(target, "data");
3280 self->handle_end = PyObject_GetAttrString(target, "end");
3281 self->handle_comment = PyObject_GetAttrString(target, "comment");
3282 self->handle_pi = PyObject_GetAttrString(target, "pi");
3283 self->handle_close = PyObject_GetAttrString(target, "close");
3284 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
3286 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 (XML_StartElementHandler) expat_start_handler,
3293 (XML_EndElementHandler) expat_end_handler
3294 );
3295 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 (XML_DefaultHandler) expat_default_handler
3298 );
3299 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003300 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 (XML_CharacterDataHandler) expat_data_handler
3302 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003305 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 (XML_CommentHandler) expat_comment_handler
3307 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003308 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 (XML_ProcessingInstructionHandler) expat_pi_handler
3312 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003313 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003314 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003315 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3316 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003319 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321
Eli Bendersky52467b12012-06-01 07:13:08 +03003322 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323}
3324
Eli Bendersky52467b12012-06-01 07:13:08 +03003325static int
3326xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3327{
3328 Py_VISIT(self->handle_close);
3329 Py_VISIT(self->handle_pi);
3330 Py_VISIT(self->handle_comment);
3331 Py_VISIT(self->handle_end);
3332 Py_VISIT(self->handle_data);
3333 Py_VISIT(self->handle_start);
3334
3335 Py_VISIT(self->target);
3336 Py_VISIT(self->entity);
3337 Py_VISIT(self->names);
3338
3339 return 0;
3340}
3341
3342static int
3343xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344{
3345 EXPAT(ParserFree)(self->parser);
3346
Antoine Pitrouc1948842012-10-01 23:40:37 +02003347 Py_CLEAR(self->handle_close);
3348 Py_CLEAR(self->handle_pi);
3349 Py_CLEAR(self->handle_comment);
3350 Py_CLEAR(self->handle_end);
3351 Py_CLEAR(self->handle_data);
3352 Py_CLEAR(self->handle_start);
3353 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354
Antoine Pitrouc1948842012-10-01 23:40:37 +02003355 Py_CLEAR(self->target);
3356 Py_CLEAR(self->entity);
3357 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358
Eli Bendersky52467b12012-06-01 07:13:08 +03003359 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360}
3361
Eli Bendersky52467b12012-06-01 07:13:08 +03003362static void
3363xmlparser_dealloc(XMLParserObject* self)
3364{
3365 PyObject_GC_UnTrack(self);
3366 xmlparser_gc_clear(self);
3367 Py_TYPE(self)->tp_free((PyObject *)self);
3368}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369
3370LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003371expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372{
3373 int ok;
3374
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003375 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003376 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3377
3378 if (PyErr_Occurred())
3379 return NULL;
3380
3381 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003382 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003383 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003385 EXPAT(GetErrorColumnNumber)(self->parser),
3386 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387 );
3388 return NULL;
3389 }
3390
3391 Py_RETURN_NONE;
3392}
3393
Serhiy Storchakacb985562015-05-04 15:32:48 +03003394/*[clinic input]
3395_elementtree.XMLParser.close
3396
3397[clinic start generated code]*/
3398
3399static PyObject *
3400_elementtree_XMLParser_close_impl(XMLParserObject *self)
3401/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402{
3403 /* end feeding data to parser */
3404
3405 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003407 if (!res)
3408 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003410 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 Py_DECREF(res);
3412 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003413 }
3414 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003415 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003416 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003417 }
3418 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003419 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421}
3422
Serhiy Storchakacb985562015-05-04 15:32:48 +03003423/*[clinic input]
3424_elementtree.XMLParser.feed
3425
3426 data: object
3427 /
3428
3429[clinic start generated code]*/
3430
3431static PyObject *
3432_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3433/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434{
3435 /* feed data to parser */
3436
Serhiy Storchakacb985562015-05-04 15:32:48 +03003437 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003438 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003439 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3440 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003441 return NULL;
3442 if (data_len > INT_MAX) {
3443 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3444 return NULL;
3445 }
3446 /* Explicitly set UTF-8 encoding. Return code ignored. */
3447 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003448 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003449 }
3450 else {
3451 Py_buffer view;
3452 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003453 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003454 return NULL;
3455 if (view.len > INT_MAX) {
3456 PyBuffer_Release(&view);
3457 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3458 return NULL;
3459 }
3460 res = expat_parse(self, view.buf, (int)view.len, 0);
3461 PyBuffer_Release(&view);
3462 return res;
3463 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464}
3465
Serhiy Storchakacb985562015-05-04 15:32:48 +03003466/*[clinic input]
3467_elementtree.XMLParser._parse_whole
3468
3469 file: object
3470 /
3471
3472[clinic start generated code]*/
3473
3474static PyObject *
3475_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3476/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003477{
Eli Benderskya3699232013-05-19 18:47:23 -07003478 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 PyObject* reader;
3480 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003481 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 PyObject* res;
3483
Serhiy Storchakacb985562015-05-04 15:32:48 +03003484 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485 if (!reader)
3486 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003487
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488 /* read from open file object */
3489 for (;;) {
3490
3491 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3492
3493 if (!buffer) {
3494 /* read failed (e.g. due to KeyboardInterrupt) */
3495 Py_DECREF(reader);
3496 return NULL;
3497 }
3498
Eli Benderskyf996e772012-03-16 05:53:30 +02003499 if (PyUnicode_CheckExact(buffer)) {
3500 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003501 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003502 Py_DECREF(buffer);
3503 break;
3504 }
3505 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003506 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003507 if (!temp) {
3508 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003509 Py_DECREF(reader);
3510 return NULL;
3511 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003512 buffer = temp;
3513 }
3514 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 Py_DECREF(buffer);
3516 break;
3517 }
3518
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003519 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3520 Py_DECREF(buffer);
3521 Py_DECREF(reader);
3522 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3523 return NULL;
3524 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003526 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 );
3528
3529 Py_DECREF(buffer);
3530
3531 if (!res) {
3532 Py_DECREF(reader);
3533 return NULL;
3534 }
3535 Py_DECREF(res);
3536
3537 }
3538
3539 Py_DECREF(reader);
3540
3541 res = expat_parse(self, "", 0, 1);
3542
3543 if (res && TreeBuilder_CheckExact(self->target)) {
3544 Py_DECREF(res);
3545 return treebuilder_done((TreeBuilderObject*) self->target);
3546 }
3547
3548 return res;
3549}
3550
Serhiy Storchakacb985562015-05-04 15:32:48 +03003551/*[clinic input]
3552_elementtree.XMLParser.doctype
3553
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003554 name: object
3555 pubid: object
3556 system: object
3557 /
3558
Serhiy Storchakacb985562015-05-04 15:32:48 +03003559[clinic start generated code]*/
3560
3561static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003562_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3563 PyObject *pubid, PyObject *system)
3564/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003565{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003566 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3567 "This method of XMLParser is deprecated. Define"
3568 " doctype() method on the TreeBuilder target.",
3569 1) < 0) {
3570 return NULL;
3571 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003572 Py_RETURN_NONE;
3573}
3574
Serhiy Storchakacb985562015-05-04 15:32:48 +03003575/*[clinic input]
3576_elementtree.XMLParser._setevents
3577
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003578 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003579 events_to_report: object = None
3580 /
3581
3582[clinic start generated code]*/
3583
3584static PyObject *
3585_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3586 PyObject *events_queue,
3587 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003588/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589{
3590 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003591 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003592 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003593 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594
3595 if (!TreeBuilder_CheckExact(self->target)) {
3596 PyErr_SetString(
3597 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003598 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599 "targets"
3600 );
3601 return NULL;
3602 }
3603
3604 target = (TreeBuilderObject*) self->target;
3605
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003606 events_append = PyObject_GetAttrString(events_queue, "append");
3607 if (events_append == NULL)
3608 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003609 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610
3611 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003612 Py_CLEAR(target->start_event_obj);
3613 Py_CLEAR(target->end_event_obj);
3614 Py_CLEAR(target->start_ns_event_obj);
3615 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003617 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003618 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003619 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003620 Py_RETURN_NONE;
3621 }
3622
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003623 if (!(events_seq = PySequence_Fast(events_to_report,
3624 "events must be a sequence"))) {
3625 return NULL;
3626 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003628 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003629 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003630 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003631 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003632 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 } else if (PyBytes_Check(event_name_obj)) {
3634 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003635 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003636 if (event_name == NULL) {
3637 Py_DECREF(events_seq);
3638 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3639 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003640 }
3641
3642 Py_INCREF(event_name_obj);
3643 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003644 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003645 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003646 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003647 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003648 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003649 EXPAT(SetNamespaceDeclHandler)(
3650 self->parser,
3651 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3652 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3653 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003654 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003655 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656 EXPAT(SetNamespaceDeclHandler)(
3657 self->parser,
3658 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3659 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3660 );
3661 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003662 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 Py_DECREF(events_seq);
3664 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665 return NULL;
3666 }
3667 }
3668
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003669 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671}
3672
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003673static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003674xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003676 if (PyUnicode_Check(nameobj)) {
3677 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003678 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003679 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003680 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003681 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003682 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003683 return PyUnicode_FromFormat(
3684 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003686 }
3687 else
3688 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689
Alexander Belopolskye239d232010-12-08 23:31:48 +00003690 Py_INCREF(res);
3691 return res;
3692 }
3693 generic:
3694 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695}
3696
Serhiy Storchakacb985562015-05-04 15:32:48 +03003697#include "clinic/_elementtree.c.h"
3698
3699static PyMethodDef element_methods[] = {
3700
3701 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3702
3703 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3704 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3705
3706 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3707 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3708 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3709
3710 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3711 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3712 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3713 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3714
3715 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3716 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3717 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3718
Victor Stinner84d8baa2016-09-29 22:12:35 +02003719 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003720 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3721
3722 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3723 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3724
3725 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3726
3727 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3728 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3729 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3730 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3731 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3732
3733 {NULL, NULL}
3734};
3735
3736static PyMappingMethods element_as_mapping = {
3737 (lenfunc) element_length,
3738 (binaryfunc) element_subscr,
3739 (objobjargproc) element_ass_subscr,
3740};
3741
Serhiy Storchakadde08152015-11-25 15:28:13 +02003742static PyGetSetDef element_getsetlist[] = {
3743 {"tag",
3744 (getter)element_tag_getter,
3745 (setter)element_tag_setter,
3746 "A string identifying what kind of data this element represents"},
3747 {"text",
3748 (getter)element_text_getter,
3749 (setter)element_text_setter,
3750 "A string of text directly after the start tag, or None"},
3751 {"tail",
3752 (getter)element_tail_getter,
3753 (setter)element_tail_setter,
3754 "A string of text directly after the end tag, or None"},
3755 {"attrib",
3756 (getter)element_attrib_getter,
3757 (setter)element_attrib_setter,
3758 "A dictionary containing the element's attributes"},
3759 {NULL},
3760};
3761
Serhiy Storchakacb985562015-05-04 15:32:48 +03003762static PyTypeObject Element_Type = {
3763 PyVarObject_HEAD_INIT(NULL, 0)
3764 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3765 /* methods */
3766 (destructor)element_dealloc, /* tp_dealloc */
3767 0, /* tp_print */
3768 0, /* tp_getattr */
3769 0, /* tp_setattr */
3770 0, /* tp_reserved */
3771 (reprfunc)element_repr, /* tp_repr */
3772 0, /* tp_as_number */
3773 &element_as_sequence, /* tp_as_sequence */
3774 &element_as_mapping, /* tp_as_mapping */
3775 0, /* tp_hash */
3776 0, /* tp_call */
3777 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003778 PyObject_GenericGetAttr, /* tp_getattro */
3779 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003780 0, /* tp_as_buffer */
3781 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3782 /* tp_flags */
3783 0, /* tp_doc */
3784 (traverseproc)element_gc_traverse, /* tp_traverse */
3785 (inquiry)element_gc_clear, /* tp_clear */
3786 0, /* tp_richcompare */
3787 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3788 0, /* tp_iter */
3789 0, /* tp_iternext */
3790 element_methods, /* tp_methods */
3791 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003792 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003793 0, /* tp_base */
3794 0, /* tp_dict */
3795 0, /* tp_descr_get */
3796 0, /* tp_descr_set */
3797 0, /* tp_dictoffset */
3798 (initproc)element_init, /* tp_init */
3799 PyType_GenericAlloc, /* tp_alloc */
3800 element_new, /* tp_new */
3801 0, /* tp_free */
3802};
3803
3804static PyMethodDef treebuilder_methods[] = {
3805 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3806 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3807 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3808 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3809 {NULL, NULL}
3810};
3811
3812static PyTypeObject TreeBuilder_Type = {
3813 PyVarObject_HEAD_INIT(NULL, 0)
3814 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3815 /* methods */
3816 (destructor)treebuilder_dealloc, /* tp_dealloc */
3817 0, /* tp_print */
3818 0, /* tp_getattr */
3819 0, /* tp_setattr */
3820 0, /* tp_reserved */
3821 0, /* tp_repr */
3822 0, /* tp_as_number */
3823 0, /* tp_as_sequence */
3824 0, /* tp_as_mapping */
3825 0, /* tp_hash */
3826 0, /* tp_call */
3827 0, /* tp_str */
3828 0, /* tp_getattro */
3829 0, /* tp_setattro */
3830 0, /* tp_as_buffer */
3831 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3832 /* tp_flags */
3833 0, /* tp_doc */
3834 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3835 (inquiry)treebuilder_gc_clear, /* tp_clear */
3836 0, /* tp_richcompare */
3837 0, /* tp_weaklistoffset */
3838 0, /* tp_iter */
3839 0, /* tp_iternext */
3840 treebuilder_methods, /* tp_methods */
3841 0, /* tp_members */
3842 0, /* tp_getset */
3843 0, /* tp_base */
3844 0, /* tp_dict */
3845 0, /* tp_descr_get */
3846 0, /* tp_descr_set */
3847 0, /* tp_dictoffset */
3848 _elementtree_TreeBuilder___init__, /* tp_init */
3849 PyType_GenericAlloc, /* tp_alloc */
3850 treebuilder_new, /* tp_new */
3851 0, /* tp_free */
3852};
3853
3854static PyMethodDef xmlparser_methods[] = {
3855 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3856 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3857 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3858 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3859 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3860 {NULL, NULL}
3861};
3862
Neal Norwitz227b5332006-03-22 09:28:35 +00003863static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003864 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003865 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003866 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003867 (destructor)xmlparser_dealloc, /* tp_dealloc */
3868 0, /* tp_print */
3869 0, /* tp_getattr */
3870 0, /* tp_setattr */
3871 0, /* tp_reserved */
3872 0, /* tp_repr */
3873 0, /* tp_as_number */
3874 0, /* tp_as_sequence */
3875 0, /* tp_as_mapping */
3876 0, /* tp_hash */
3877 0, /* tp_call */
3878 0, /* tp_str */
3879 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3880 0, /* tp_setattro */
3881 0, /* tp_as_buffer */
3882 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3883 /* tp_flags */
3884 0, /* tp_doc */
3885 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3886 (inquiry)xmlparser_gc_clear, /* tp_clear */
3887 0, /* tp_richcompare */
3888 0, /* tp_weaklistoffset */
3889 0, /* tp_iter */
3890 0, /* tp_iternext */
3891 xmlparser_methods, /* tp_methods */
3892 0, /* tp_members */
3893 0, /* tp_getset */
3894 0, /* tp_base */
3895 0, /* tp_dict */
3896 0, /* tp_descr_get */
3897 0, /* tp_descr_set */
3898 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003899 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003900 PyType_GenericAlloc, /* tp_alloc */
3901 xmlparser_new, /* tp_new */
3902 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003903};
3904
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003905/* ==================================================================== */
3906/* python module interface */
3907
3908static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003909 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003910 {NULL, NULL}
3911};
3912
Martin v. Löwis1a214512008-06-11 05:26:20 +00003913
Eli Bendersky532d03e2013-08-10 08:00:39 -07003914static struct PyModuleDef elementtreemodule = {
3915 PyModuleDef_HEAD_INIT,
3916 "_elementtree",
3917 NULL,
3918 sizeof(elementtreestate),
3919 _functions,
3920 NULL,
3921 elementtree_traverse,
3922 elementtree_clear,
3923 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003924};
3925
Neal Norwitzf6657e62006-12-28 04:47:50 +00003926PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003927PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003929 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003930 elementtreestate *st;
3931
3932 m = PyState_FindModule(&elementtreemodule);
3933 if (m) {
3934 Py_INCREF(m);
3935 return m;
3936 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003937
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003938 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003939 if (PyType_Ready(&ElementIter_Type) < 0)
3940 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003941 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003942 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003943 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003944 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003945 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003946 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003947
Eli Bendersky532d03e2013-08-10 08:00:39 -07003948 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003949 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003950 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003951 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003952
Eli Bendersky828efde2012-04-05 05:40:58 +03003953 if (!(temp = PyImport_ImportModule("copy")))
3954 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003955 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003956 Py_XDECREF(temp);
3957
Eli Bendersky532d03e2013-08-10 08:00:39 -07003958 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003959 return NULL;
3960
Eli Bendersky20d41742012-06-01 09:48:37 +03003961 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003962 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3963 if (expat_capi) {
3964 /* check that it's usable */
3965 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003966 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003967 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3968 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003969 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003970 PyErr_SetString(PyExc_ImportError,
3971 "pyexpat version is incompatible");
3972 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003974 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003975 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003976 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003977
Eli Bendersky532d03e2013-08-10 08:00:39 -07003978 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003979 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003980 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003981 Py_INCREF(st->parseerror_obj);
3982 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003983
Eli Bendersky092af1f2012-03-04 07:14:03 +02003984 Py_INCREF((PyObject *)&Element_Type);
3985 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3986
Eli Bendersky58d548d2012-05-29 15:45:16 +03003987 Py_INCREF((PyObject *)&TreeBuilder_Type);
3988 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3989
Eli Bendersky52467b12012-06-01 07:13:08 +03003990 Py_INCREF((PyObject *)&XMLParser_Type);
3991 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003992
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003993 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003994}