blob: 9e6f63b91ccd26a69db030f9cfa1c7943b8baa00 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200153 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300629
630 if (self->weakreflist != NULL)
631 PyObject_ClearWeakRefs((PyObject *) self);
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 /* element_gc_clear clears all references and deallocates extra
634 */
635 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000636
637 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200638 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639}
640
641/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
Serhiy Storchakacb985562015-05-04 15:32:48 +0300643/*[clinic input]
644_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645
Serhiy Storchakacb985562015-05-04 15:32:48 +0300646 subelement: object(subclass_of='&Element_Type')
647 /
648
649[clinic start generated code]*/
650
651static PyObject *
652_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
653/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
654{
655 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656 return NULL;
657
658 Py_RETURN_NONE;
659}
660
Serhiy Storchakacb985562015-05-04 15:32:48 +0300661/*[clinic input]
662_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
Serhiy Storchakacb985562015-05-04 15:32:48 +0300664[clinic start generated code]*/
665
666static PyObject *
667_elementtree_Element_clear_impl(ElementObject *self)
668/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
669{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
672 Py_INCREF(Py_None);
673 Py_DECREF(JOIN_OBJ(self->text));
674 self->text = Py_None;
675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->tail));
678 self->tail = Py_None;
679
680 Py_RETURN_NONE;
681}
682
Serhiy Storchakacb985562015-05-04 15:32:48 +0300683/*[clinic input]
684_elementtree.Element.__copy__
685
686[clinic start generated code]*/
687
688static PyObject *
689_elementtree_Element___copy___impl(ElementObject *self)
690/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200692 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 ElementObject* element;
694
Eli Bendersky092af1f2012-03-04 07:14:03 +0200695 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800696 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 if (!element)
698 return NULL;
699
700 Py_DECREF(JOIN_OBJ(element->text));
701 element->text = self->text;
702 Py_INCREF(JOIN_OBJ(element->text));
703
704 Py_DECREF(JOIN_OBJ(element->tail));
705 element->tail = self->tail;
706 Py_INCREF(JOIN_OBJ(element->tail));
707
708 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000709 if (element_resize(element, self->extra->length) < 0) {
710 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 for (i = 0; i < self->extra->length; i++) {
715 Py_INCREF(self->extra->children[i]);
716 element->extra->children[i] = self->extra->children[i];
717 }
718
719 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 }
721
722 return (PyObject*) element;
723}
724
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200725/* Helper for a deep copy. */
726LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
727
Serhiy Storchakacb985562015-05-04 15:32:48 +0300728/*[clinic input]
729_elementtree.Element.__deepcopy__
730
731 memo: object
732 /
733
734[clinic start generated code]*/
735
736static PyObject *
737_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
738/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200740 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 ElementObject* element;
742 PyObject* tag;
743 PyObject* attrib;
744 PyObject* text;
745 PyObject* tail;
746 PyObject* id;
747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 tag = deepcopy(self->tag, memo);
749 if (!tag)
750 return NULL;
751
752 if (self->extra) {
753 attrib = deepcopy(self->extra->attrib, memo);
754 if (!attrib) {
755 Py_DECREF(tag);
756 return NULL;
757 }
758 } else {
759 Py_INCREF(Py_None);
760 attrib = Py_None;
761 }
762
Eli Bendersky092af1f2012-03-04 07:14:03 +0200763 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 Py_DECREF(tag);
766 Py_DECREF(attrib);
767
768 if (!element)
769 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 text = deepcopy(JOIN_OBJ(self->text), memo);
772 if (!text)
773 goto error;
774 Py_DECREF(element->text);
775 element->text = JOIN_SET(text, JOIN_GET(self->text));
776
777 tail = deepcopy(JOIN_OBJ(self->tail), memo);
778 if (!tail)
779 goto error;
780 Py_DECREF(element->tail);
781 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
782
783 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 if (element_resize(element, self->extra->length) < 0)
785 goto error;
786
787 for (i = 0; i < self->extra->length; i++) {
788 PyObject* child = deepcopy(self->extra->children[i], memo);
789 if (!child) {
790 element->extra->length = i;
791 goto error;
792 }
793 element->extra->children[i] = child;
794 }
795
796 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 }
798
799 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700800 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 if (!id)
802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 i = PyDict_SetItem(memo, id, (PyObject*) element);
805
806 Py_DECREF(id);
807
808 if (i < 0)
809 goto error;
810
811 return (PyObject*) element;
812
813 error:
814 Py_DECREF(element);
815 return NULL;
816}
817
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200818LOCAL(PyObject *)
819deepcopy(PyObject *object, PyObject *memo)
820{
821 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200822 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200823 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200824
825 /* Fast paths */
826 if (object == Py_None || PyUnicode_CheckExact(object)) {
827 Py_INCREF(object);
828 return object;
829 }
830
831 if (Py_REFCNT(object) == 1) {
832 if (PyDict_CheckExact(object)) {
833 PyObject *key, *value;
834 Py_ssize_t pos = 0;
835 int simple = 1;
836 while (PyDict_Next(object, &pos, &key, &value)) {
837 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
838 simple = 0;
839 break;
840 }
841 }
842 if (simple)
843 return PyDict_Copy(object);
844 /* Fall through to general case */
845 }
846 else if (Element_CheckExact(object)) {
847 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
848 }
849 }
850
851 /* General case */
852 st = ET_STATE_GLOBAL;
853 if (!st->deepcopy_obj) {
854 PyErr_SetString(PyExc_RuntimeError,
855 "deepcopy helper not found");
856 return NULL;
857 }
858
Victor Stinner7fbac452016-08-20 01:34:44 +0200859 stack[0] = object;
860 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200861 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200862}
863
864
Serhiy Storchakacb985562015-05-04 15:32:48 +0300865/*[clinic input]
866_elementtree.Element.__sizeof__ -> Py_ssize_t
867
868[clinic start generated code]*/
869
870static Py_ssize_t
871_elementtree_Element___sizeof___impl(ElementObject *self)
872/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200873{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200874 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875 if (self->extra) {
876 result += sizeof(ElementObjectExtra);
877 if (self->extra->children != self->extra->_children)
878 result += sizeof(PyObject*) * self->extra->allocated;
879 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300880 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881}
882
Eli Bendersky698bdb22013-01-10 06:01:06 -0800883/* dict keys for getstate/setstate. */
884#define PICKLED_TAG "tag"
885#define PICKLED_CHILDREN "_children"
886#define PICKLED_ATTRIB "attrib"
887#define PICKLED_TAIL "tail"
888#define PICKLED_TEXT "text"
889
890/* __getstate__ returns a fabricated instance dict as in the pure-Python
891 * Element implementation, for interoperability/interchangeability. This
892 * makes the pure-Python implementation details an API, but (a) there aren't
893 * any unnecessary structures there; and (b) it buys compatibility with 3.2
894 * pickles. See issue #16076.
895 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300896/*[clinic input]
897_elementtree.Element.__getstate__
898
899[clinic start generated code]*/
900
Eli Bendersky698bdb22013-01-10 06:01:06 -0800901static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902_elementtree_Element___getstate___impl(ElementObject *self)
903/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800904{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200905 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906 PyObject *instancedict = NULL, *children;
907
908 /* Build a list of children. */
909 children = PyList_New(self->extra ? self->extra->length : 0);
910 if (!children)
911 return NULL;
912 for (i = 0; i < PyList_GET_SIZE(children); i++) {
913 PyObject *child = self->extra->children[i];
914 Py_INCREF(child);
915 PyList_SET_ITEM(children, i, child);
916 }
917
918 /* Construct the state object. */
919 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
920 if (noattrib)
921 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
922 PICKLED_TAG, self->tag,
923 PICKLED_CHILDREN, children,
924 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700925 PICKLED_TEXT, JOIN_OBJ(self->text),
926 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800927 else
928 instancedict = Py_BuildValue("{sOsOsOsOsO}",
929 PICKLED_TAG, self->tag,
930 PICKLED_CHILDREN, children,
931 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700932 PICKLED_TEXT, JOIN_OBJ(self->text),
933 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800934 if (instancedict) {
935 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800936 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800937 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 else {
939 for (i = 0; i < PyList_GET_SIZE(children); i++)
940 Py_DECREF(PyList_GET_ITEM(children, i));
941 Py_DECREF(children);
942
943 return NULL;
944 }
945}
946
947static PyObject *
948element_setstate_from_attributes(ElementObject *self,
949 PyObject *tag,
950 PyObject *attrib,
951 PyObject *text,
952 PyObject *tail,
953 PyObject *children)
954{
955 Py_ssize_t i, nchildren;
956
957 if (!tag) {
958 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
959 return NULL;
960 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800961
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200962 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300963 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964
Eli Benderskydd3661e2013-09-13 06:24:25 -0700965 _clear_joined_ptr(&self->text);
966 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
967 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->tail);
970 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
973 /* Handle ATTRIB and CHILDREN. */
974 if (!children && !attrib)
975 Py_RETURN_NONE;
976
977 /* Compute 'nchildren'. */
978 if (children) {
979 if (!PyList_Check(children)) {
980 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
981 return NULL;
982 }
983 nchildren = PyList_Size(children);
984 }
985 else {
986 nchildren = 0;
987 }
988
989 /* Allocate 'extra'. */
990 if (element_resize(self, nchildren)) {
991 return NULL;
992 }
993 assert(self->extra && self->extra->allocated >= nchildren);
994
995 /* Copy children */
996 for (i = 0; i < nchildren; i++) {
997 self->extra->children[i] = PyList_GET_ITEM(children, i);
998 Py_INCREF(self->extra->children[i]);
999 }
1000
1001 self->extra->length = nchildren;
1002 self->extra->allocated = nchildren;
1003
1004 /* Stash attrib. */
1005 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001006 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001007 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 }
1009
1010 Py_RETURN_NONE;
1011}
1012
1013/* __setstate__ for Element instance from the Python implementation.
1014 * 'state' should be the instance dict.
1015 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001016
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017static PyObject *
1018element_setstate_from_Python(ElementObject *self, PyObject *state)
1019{
1020 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1021 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1022 PyObject *args;
1023 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001024 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001025
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026 tag = attrib = text = tail = children = NULL;
1027 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030
1031 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1032 &attrib, &text, &tail, &children))
1033 retval = element_setstate_from_attributes(self, tag, attrib, text,
1034 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036 retval = NULL;
1037
1038 Py_DECREF(args);
1039 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040}
1041
Serhiy Storchakacb985562015-05-04 15:32:48 +03001042/*[clinic input]
1043_elementtree.Element.__setstate__
1044
1045 state: object
1046 /
1047
1048[clinic start generated code]*/
1049
Eli Bendersky698bdb22013-01-10 06:01:06 -08001050static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001051_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1052/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001053{
1054 if (!PyDict_CheckExact(state)) {
1055 PyErr_Format(PyExc_TypeError,
1056 "Don't know how to unpickle \"%.200R\" as an Element",
1057 state);
1058 return NULL;
1059 }
1060 else
1061 return element_setstate_from_Python(self, state);
1062}
1063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064LOCAL(int)
1065checkpath(PyObject* tag)
1066{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001067 Py_ssize_t i;
1068 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069
1070 /* check if a tag contains an xpath character */
1071
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001072#define PATHCHAR(ch) \
1073 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1077 void *data = PyUnicode_DATA(tag);
1078 unsigned int kind = PyUnicode_KIND(tag);
1079 for (i = 0; i < len; i++) {
1080 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1081 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001082 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001083 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 return 1;
1087 }
1088 return 0;
1089 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001090 if (PyBytes_Check(tag)) {
1091 char *p = PyBytes_AS_STRING(tag);
1092 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 if (p[i] == '{')
1094 check = 0;
1095 else if (p[i] == '}')
1096 check = 1;
1097 else if (check && PATHCHAR(p[i]))
1098 return 1;
1099 }
1100 return 0;
1101 }
1102
1103 return 1; /* unknown type; might be path expression */
1104}
1105
Serhiy Storchakacb985562015-05-04 15:32:48 +03001106/*[clinic input]
1107_elementtree.Element.extend
1108
1109 elements: object
1110 /
1111
1112[clinic start generated code]*/
1113
1114static PyObject *
1115_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1116/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001117{
1118 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001119 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001120
Serhiy Storchakacb985562015-05-04 15:32:48 +03001121 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 if (!seq) {
1123 PyErr_Format(
1124 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 );
1127 return NULL;
1128 }
1129
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001130 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 Py_INCREF(element);
1133 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001134 PyErr_Format(
1135 PyExc_TypeError,
1136 "expected an Element, not \"%.200s\"",
1137 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 Py_DECREF(seq);
1139 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001140 return NULL;
1141 }
1142
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001143 if (element_add_subelement(self, element) < 0) {
1144 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001145 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001146 return NULL;
1147 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001148 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 }
1150
1151 Py_DECREF(seq);
1152
1153 Py_RETURN_NONE;
1154}
1155
Serhiy Storchakacb985562015-05-04 15:32:48 +03001156/*[clinic input]
1157_elementtree.Element.find
1158
1159 path: object
1160 namespaces: object = None
1161
1162[clinic start generated code]*/
1163
1164static PyObject *
1165_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1166 PyObject *namespaces)
1167/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001169 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001170 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001171
Serhiy Storchakacb985562015-05-04 15:32:48 +03001172 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001173 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001174 return _PyObject_CallMethodIdObjArgs(
1175 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178
1179 if (!self->extra)
1180 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 for (i = 0; i < self->extra->length; i++) {
1183 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001184 int rc;
1185 if (!Element_CheckExact(item))
1186 continue;
1187 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001188 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001189 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 Py_DECREF(item);
1192 if (rc < 0)
1193 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 }
1195
1196 Py_RETURN_NONE;
1197}
1198
Serhiy Storchakacb985562015-05-04 15:32:48 +03001199/*[clinic input]
1200_elementtree.Element.findtext
1201
1202 path: object
1203 default: object = None
1204 namespaces: object = None
1205
1206[clinic start generated code]*/
1207
1208static PyObject *
1209_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1210 PyObject *default_value,
1211 PyObject *namespaces)
1212/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001214 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001215 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001216 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001217
Serhiy Storchakacb985562015-05-04 15:32:48 +03001218 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001219 return _PyObject_CallMethodIdObjArgs(
1220 st->elementpath_obj, &PyId_findtext,
1221 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 );
1223
1224 if (!self->extra) {
1225 Py_INCREF(default_value);
1226 return default_value;
1227 }
1228
1229 for (i = 0; i < self->extra->length; i++) {
1230 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001231 int rc;
1232 if (!Element_CheckExact(item))
1233 continue;
1234 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001235 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001236 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001238 if (text == Py_None) {
1239 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001240 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001242 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 return text;
1245 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
1247 if (rc < 0)
1248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 }
1250
1251 Py_INCREF(default_value);
1252 return default_value;
1253}
1254
Serhiy Storchakacb985562015-05-04 15:32:48 +03001255/*[clinic input]
1256_elementtree.Element.findall
1257
1258 path: object
1259 namespaces: object = None
1260
1261[clinic start generated code]*/
1262
1263static PyObject *
1264_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1265 PyObject *namespaces)
1266/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001268 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001270 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001271 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001272
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001273 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001274 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001275 return _PyObject_CallMethodIdObjArgs(
1276 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279
1280 out = PyList_New(0);
1281 if (!out)
1282 return NULL;
1283
1284 if (!self->extra)
1285 return out;
1286
1287 for (i = 0; i < self->extra->length; i++) {
1288 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001289 int rc;
1290 if (!Element_CheckExact(item))
1291 continue;
1292 Py_INCREF(item);
1293 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1294 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1295 Py_DECREF(item);
1296 Py_DECREF(out);
1297 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001299 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 }
1301
1302 return out;
1303}
1304
Serhiy Storchakacb985562015-05-04 15:32:48 +03001305/*[clinic input]
1306_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308 path: object
1309 namespaces: object = None
1310
1311[clinic start generated code]*/
1312
1313static PyObject *
1314_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1315 PyObject *namespaces)
1316/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1317{
1318 PyObject* tag = path;
1319 _Py_IDENTIFIER(iterfind);
1320 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321
Victor Stinnerf5616342016-12-09 15:26:00 +01001322 return _PyObject_CallMethodIdObjArgs(
1323 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324}
1325
Serhiy Storchakacb985562015-05-04 15:32:48 +03001326/*[clinic input]
1327_elementtree.Element.get
1328
1329 key: object
1330 default: object = None
1331
1332[clinic start generated code]*/
1333
1334static PyObject *
1335_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1336 PyObject *default_value)
1337/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
1339 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340
1341 if (!self->extra || self->extra->attrib == Py_None)
1342 value = default_value;
1343 else {
1344 value = PyDict_GetItem(self->extra->attrib, key);
1345 if (!value)
1346 value = default_value;
1347 }
1348
1349 Py_INCREF(value);
1350 return value;
1351}
1352
Serhiy Storchakacb985562015-05-04 15:32:48 +03001353/*[clinic input]
1354_elementtree.Element.getchildren
1355
1356[clinic start generated code]*/
1357
1358static PyObject *
1359_elementtree_Element_getchildren_impl(ElementObject *self)
1360/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001362 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363 PyObject* list;
1364
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001365 /* FIXME: report as deprecated? */
1366
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 if (!self->extra)
1368 return PyList_New(0);
1369
1370 list = PyList_New(self->extra->length);
1371 if (!list)
1372 return NULL;
1373
1374 for (i = 0; i < self->extra->length; i++) {
1375 PyObject* item = self->extra->children[i];
1376 Py_INCREF(item);
1377 PyList_SET_ITEM(list, i, item);
1378 }
1379
1380 return list;
1381}
1382
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001383
Eli Bendersky64d11e62012-06-15 07:42:50 +03001384static PyObject *
1385create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1386
1387
Serhiy Storchakacb985562015-05-04 15:32:48 +03001388/*[clinic input]
1389_elementtree.Element.iter
1390
1391 tag: object = None
1392
1393[clinic start generated code]*/
1394
Eli Bendersky64d11e62012-06-15 07:42:50 +03001395static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001396_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1397/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001399 if (PyUnicode_Check(tag)) {
1400 if (PyUnicode_READY(tag) < 0)
1401 return NULL;
1402 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1403 tag = Py_None;
1404 }
1405 else if (PyBytes_Check(tag)) {
1406 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1407 tag = Py_None;
1408 }
1409
Eli Bendersky64d11e62012-06-15 07:42:50 +03001410 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001411}
1412
1413
Serhiy Storchakacb985562015-05-04 15:32:48 +03001414/*[clinic input]
1415_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416
Serhiy Storchakacb985562015-05-04 15:32:48 +03001417[clinic start generated code]*/
1418
1419static PyObject *
1420_elementtree_Element_itertext_impl(ElementObject *self)
1421/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1422{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001423 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424}
1425
Eli Bendersky64d11e62012-06-15 07:42:50 +03001426
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001428element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001430 ElementObject* self = (ElementObject*) self_;
1431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432 if (!self->extra || index < 0 || index >= self->extra->length) {
1433 PyErr_SetString(
1434 PyExc_IndexError,
1435 "child index out of range"
1436 );
1437 return NULL;
1438 }
1439
1440 Py_INCREF(self->extra->children[index]);
1441 return self->extra->children[index];
1442}
1443
Serhiy Storchakacb985562015-05-04 15:32:48 +03001444/*[clinic input]
1445_elementtree.Element.insert
1446
1447 index: Py_ssize_t
1448 subelement: object(subclass_of='&Element_Type')
1449 /
1450
1451[clinic start generated code]*/
1452
1453static PyObject *
1454_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1455 PyObject *subelement)
1456/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001458 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459
Victor Stinner5f0af232013-07-11 23:01:36 +02001460 if (!self->extra) {
1461 if (create_extra(self, NULL) < 0)
1462 return NULL;
1463 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001465 if (index < 0) {
1466 index += self->extra->length;
1467 if (index < 0)
1468 index = 0;
1469 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470 if (index > self->extra->length)
1471 index = self->extra->length;
1472
1473 if (element_resize(self, 1) < 0)
1474 return NULL;
1475
1476 for (i = self->extra->length; i > index; i--)
1477 self->extra->children[i] = self->extra->children[i-1];
1478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479 Py_INCREF(subelement);
1480 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481
1482 self->extra->length++;
1483
1484 Py_RETURN_NONE;
1485}
1486
Serhiy Storchakacb985562015-05-04 15:32:48 +03001487/*[clinic input]
1488_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490[clinic start generated code]*/
1491
1492static PyObject *
1493_elementtree_Element_items_impl(ElementObject *self)
1494/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1495{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496 if (!self->extra || self->extra->attrib == Py_None)
1497 return PyList_New(0);
1498
1499 return PyDict_Items(self->extra->attrib);
1500}
1501
Serhiy Storchakacb985562015-05-04 15:32:48 +03001502/*[clinic input]
1503_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001504
Serhiy Storchakacb985562015-05-04 15:32:48 +03001505[clinic start generated code]*/
1506
1507static PyObject *
1508_elementtree_Element_keys_impl(ElementObject *self)
1509/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1510{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511 if (!self->extra || self->extra->attrib == Py_None)
1512 return PyList_New(0);
1513
1514 return PyDict_Keys(self->extra->attrib);
1515}
1516
Martin v. Löwis18e16552006-02-15 17:27:45 +00001517static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518element_length(ElementObject* self)
1519{
1520 if (!self->extra)
1521 return 0;
1522
1523 return self->extra->length;
1524}
1525
Serhiy Storchakacb985562015-05-04 15:32:48 +03001526/*[clinic input]
1527_elementtree.Element.makeelement
1528
1529 tag: object
1530 attrib: object
1531 /
1532
1533[clinic start generated code]*/
1534
1535static PyObject *
1536_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1537 PyObject *attrib)
1538/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539{
1540 PyObject* elem;
1541
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542 attrib = PyDict_Copy(attrib);
1543 if (!attrib)
1544 return NULL;
1545
Eli Bendersky092af1f2012-03-04 07:14:03 +02001546 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547
1548 Py_DECREF(attrib);
1549
1550 return elem;
1551}
1552
Serhiy Storchakacb985562015-05-04 15:32:48 +03001553/*[clinic input]
1554_elementtree.Element.remove
1555
1556 subelement: object(subclass_of='&Element_Type')
1557 /
1558
1559[clinic start generated code]*/
1560
1561static PyObject *
1562_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1563/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001565 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001566 int rc;
1567 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 if (!self->extra) {
1570 /* element has no children, so raise exception */
1571 PyErr_SetString(
1572 PyExc_ValueError,
1573 "list.remove(x): x not in list"
1574 );
1575 return NULL;
1576 }
1577
1578 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001579 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001581 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001582 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001584 if (rc < 0)
1585 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 }
1587
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001588 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001589 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 PyErr_SetString(
1591 PyExc_ValueError,
1592 "list.remove(x): x not in list"
1593 );
1594 return NULL;
1595 }
1596
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001597 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598
1599 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 for (; i < self->extra->length; i++)
1601 self->extra->children[i] = self->extra->children[i+1];
1602
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001603 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 Py_RETURN_NONE;
1605}
1606
1607static PyObject*
1608element_repr(ElementObject* self)
1609{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001610 int status;
1611
1612 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001613 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001614
1615 status = Py_ReprEnter((PyObject *)self);
1616 if (status == 0) {
1617 PyObject *res;
1618 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1619 Py_ReprLeave((PyObject *)self);
1620 return res;
1621 }
1622 if (status > 0)
1623 PyErr_Format(PyExc_RuntimeError,
1624 "reentrant call inside %s.__repr__",
1625 Py_TYPE(self)->tp_name);
1626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627}
1628
Serhiy Storchakacb985562015-05-04 15:32:48 +03001629/*[clinic input]
1630_elementtree.Element.set
1631
1632 key: object
1633 value: object
1634 /
1635
1636[clinic start generated code]*/
1637
1638static PyObject *
1639_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1640 PyObject *value)
1641/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642{
1643 PyObject* attrib;
1644
Victor Stinner5f0af232013-07-11 23:01:36 +02001645 if (!self->extra) {
1646 if (create_extra(self, NULL) < 0)
1647 return NULL;
1648 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649
1650 attrib = element_get_attrib(self);
1651 if (!attrib)
1652 return NULL;
1653
1654 if (PyDict_SetItem(attrib, key, value) < 0)
1655 return NULL;
1656
1657 Py_RETURN_NONE;
1658}
1659
1660static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001661element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001663 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001664 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665 PyObject* old;
1666
1667 if (!self->extra || index < 0 || index >= self->extra->length) {
1668 PyErr_SetString(
1669 PyExc_IndexError,
1670 "child assignment index out of range");
1671 return -1;
1672 }
1673
1674 old = self->extra->children[index];
1675
1676 if (item) {
1677 Py_INCREF(item);
1678 self->extra->children[index] = item;
1679 } else {
1680 self->extra->length--;
1681 for (i = index; i < self->extra->length; i++)
1682 self->extra->children[i] = self->extra->children[i+1];
1683 }
1684
1685 Py_DECREF(old);
1686
1687 return 0;
1688}
1689
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001690static PyObject*
1691element_subscr(PyObject* self_, PyObject* item)
1692{
1693 ElementObject* self = (ElementObject*) self_;
1694
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001695 if (PyIndex_Check(item)) {
1696 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001697
1698 if (i == -1 && PyErr_Occurred()) {
1699 return NULL;
1700 }
1701 if (i < 0 && self->extra)
1702 i += self->extra->length;
1703 return element_getitem(self_, i);
1704 }
1705 else if (PySlice_Check(item)) {
1706 Py_ssize_t start, stop, step, slicelen, cur, i;
1707 PyObject* list;
1708
1709 if (!self->extra)
1710 return PyList_New(0);
1711
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001712 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001713 self->extra->length,
1714 &start, &stop, &step, &slicelen) < 0) {
1715 return NULL;
1716 }
1717
1718 if (slicelen <= 0)
1719 return PyList_New(0);
1720 else {
1721 list = PyList_New(slicelen);
1722 if (!list)
1723 return NULL;
1724
1725 for (cur = start, i = 0; i < slicelen;
1726 cur += step, i++) {
1727 PyObject* item = self->extra->children[cur];
1728 Py_INCREF(item);
1729 PyList_SET_ITEM(list, i, item);
1730 }
1731
1732 return list;
1733 }
1734 }
1735 else {
1736 PyErr_SetString(PyExc_TypeError,
1737 "element indices must be integers");
1738 return NULL;
1739 }
1740}
1741
1742static int
1743element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1744{
1745 ElementObject* self = (ElementObject*) self_;
1746
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747 if (PyIndex_Check(item)) {
1748 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749
1750 if (i == -1 && PyErr_Occurred()) {
1751 return -1;
1752 }
1753 if (i < 0 && self->extra)
1754 i += self->extra->length;
1755 return element_setitem(self_, i, value);
1756 }
1757 else if (PySlice_Check(item)) {
1758 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1759
1760 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001761 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762
Victor Stinner5f0af232013-07-11 23:01:36 +02001763 if (!self->extra) {
1764 if (create_extra(self, NULL) < 0)
1765 return -1;
1766 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001768 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769 self->extra->length,
1770 &start, &stop, &step, &slicelen) < 0) {
1771 return -1;
1772 }
1773
Eli Bendersky865756a2012-03-09 13:38:15 +02001774 if (value == NULL) {
1775 /* Delete slice */
1776 size_t cur;
1777 Py_ssize_t i;
1778
1779 if (slicelen <= 0)
1780 return 0;
1781
1782 /* Since we're deleting, the direction of the range doesn't matter,
1783 * so for simplicity make it always ascending.
1784 */
1785 if (step < 0) {
1786 stop = start + 1;
1787 start = stop + step * (slicelen - 1) - 1;
1788 step = -step;
1789 }
1790
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001791 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001792
1793 /* recycle is a list that will contain all the children
1794 * scheduled for removal.
1795 */
1796 if (!(recycle = PyList_New(slicelen))) {
1797 PyErr_NoMemory();
1798 return -1;
1799 }
1800
1801 /* This loop walks over all the children that have to be deleted,
1802 * with cur pointing at them. num_moved is the amount of children
1803 * until the next deleted child that have to be "shifted down" to
1804 * occupy the deleted's places.
1805 * Note that in the ith iteration, shifting is done i+i places down
1806 * because i children were already removed.
1807 */
1808 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1809 /* Compute how many children have to be moved, clipping at the
1810 * list end.
1811 */
1812 Py_ssize_t num_moved = step - 1;
1813 if (cur + step >= (size_t)self->extra->length) {
1814 num_moved = self->extra->length - cur - 1;
1815 }
1816
1817 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1818
1819 memmove(
1820 self->extra->children + cur - i,
1821 self->extra->children + cur + 1,
1822 num_moved * sizeof(PyObject *));
1823 }
1824
1825 /* Leftover "tail" after the last removed child */
1826 cur = start + (size_t)slicelen * step;
1827 if (cur < (size_t)self->extra->length) {
1828 memmove(
1829 self->extra->children + cur - slicelen,
1830 self->extra->children + cur,
1831 (self->extra->length - cur) * sizeof(PyObject *));
1832 }
1833
1834 self->extra->length -= slicelen;
1835
1836 /* Discard the recycle list with all the deleted sub-elements */
1837 Py_XDECREF(recycle);
1838 return 0;
1839 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001840
1841 /* A new slice is actually being assigned */
1842 seq = PySequence_Fast(value, "");
1843 if (!seq) {
1844 PyErr_Format(
1845 PyExc_TypeError,
1846 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1847 );
1848 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001850 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001851
1852 if (step != 1 && newlen != slicelen)
1853 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001854 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001856 "attempt to assign sequence of size %zd "
1857 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858 newlen, slicelen
1859 );
1860 return -1;
1861 }
1862
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001863 /* Resize before creating the recycle bin, to prevent refleaks. */
1864 if (newlen > slicelen) {
1865 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001866 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001867 return -1;
1868 }
1869 }
1870
1871 if (slicelen > 0) {
1872 /* to avoid recursive calls to this method (via decref), move
1873 old items to the recycle bin here, and get rid of them when
1874 we're done modifying the element */
1875 recycle = PyList_New(slicelen);
1876 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001877 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878 return -1;
1879 }
1880 for (cur = start, i = 0; i < slicelen;
1881 cur += step, i++)
1882 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1883 }
1884
1885 if (newlen < slicelen) {
1886 /* delete slice */
1887 for (i = stop; i < self->extra->length; i++)
1888 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1889 } else if (newlen > slicelen) {
1890 /* insert slice */
1891 for (i = self->extra->length-1; i >= stop; i--)
1892 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1893 }
1894
1895 /* replace the slice */
1896 for (cur = start, i = 0; i < newlen;
1897 cur += step, i++) {
1898 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1899 Py_INCREF(element);
1900 self->extra->children[cur] = element;
1901 }
1902
1903 self->extra->length += newlen - slicelen;
1904
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001905 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001906
1907 /* discard the recycle bin, and everything in it */
1908 Py_XDECREF(recycle);
1909
1910 return 0;
1911 }
1912 else {
1913 PyErr_SetString(PyExc_TypeError,
1914 "element indices must be integers");
1915 return -1;
1916 }
1917}
1918
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001920element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001922 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001923 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924 return res;
1925}
1926
Serhiy Storchakadde08152015-11-25 15:28:13 +02001927static PyObject*
1928element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001929{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001930 PyObject *res = element_get_text(self);
1931 Py_XINCREF(res);
1932 return res;
1933}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001934
Serhiy Storchakadde08152015-11-25 15:28:13 +02001935static PyObject*
1936element_tail_getter(ElementObject *self, void *closure)
1937{
1938 PyObject *res = element_get_tail(self);
1939 Py_XINCREF(res);
1940 return res;
1941}
1942
1943static PyObject*
1944element_attrib_getter(ElementObject *self, void *closure)
1945{
1946 PyObject *res;
1947 if (!self->extra) {
1948 if (create_extra(self, NULL) < 0)
1949 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001950 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001951 res = element_get_attrib(self);
1952 Py_XINCREF(res);
1953 return res;
1954}
Victor Stinner4d463432013-07-11 23:05:03 +02001955
Serhiy Storchakadde08152015-11-25 15:28:13 +02001956/* macro for setter validation */
1957#define _VALIDATE_ATTR_VALUE(V) \
1958 if ((V) == NULL) { \
1959 PyErr_SetString( \
1960 PyExc_AttributeError, \
1961 "can't delete element attribute"); \
1962 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001963 }
1964
Serhiy Storchakadde08152015-11-25 15:28:13 +02001965static int
1966element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1967{
1968 _VALIDATE_ATTR_VALUE(value);
1969 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001970 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001971 return 0;
1972}
1973
1974static int
1975element_text_setter(ElementObject *self, PyObject *value, void *closure)
1976{
1977 _VALIDATE_ATTR_VALUE(value);
1978 Py_INCREF(value);
1979 Py_DECREF(JOIN_OBJ(self->text));
1980 self->text = value;
1981 return 0;
1982}
1983
1984static int
1985element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1986{
1987 _VALIDATE_ATTR_VALUE(value);
1988 Py_INCREF(value);
1989 Py_DECREF(JOIN_OBJ(self->tail));
1990 self->tail = value;
1991 return 0;
1992}
1993
1994static int
1995element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1996{
1997 _VALIDATE_ATTR_VALUE(value);
1998 if (!self->extra) {
1999 if (create_extra(self, NULL) < 0)
2000 return -1;
2001 }
2002 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002003 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002004 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002005}
2006
2007static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002008 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002009 0, /* sq_concat */
2010 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002012 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002013 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002014 0,
2015};
2016
Eli Bendersky64d11e62012-06-15 07:42:50 +03002017/******************************* Element iterator ****************************/
2018
2019/* ElementIterObject represents the iteration state over an XML element in
2020 * pre-order traversal. To keep track of which sub-element should be returned
2021 * next, a stack of parents is maintained. This is a standard stack-based
2022 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002023 * The stack is managed using a continuous array.
2024 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002025 * the current one is exhausted, and the next child to examine in that parent.
2026 */
2027typedef struct ParentLocator_t {
2028 ElementObject *parent;
2029 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002030} ParentLocator;
2031
2032typedef struct {
2033 PyObject_HEAD
2034 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002035 Py_ssize_t parent_stack_used;
2036 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002037 ElementObject *root_element;
2038 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002039 int gettext;
2040} ElementIterObject;
2041
2042
2043static void
2044elementiter_dealloc(ElementIterObject *it)
2045{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002046 Py_ssize_t i = it->parent_stack_used;
2047 it->parent_stack_used = 0;
2048 while (i--)
2049 Py_XDECREF(it->parent_stack[i].parent);
2050 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002051
2052 Py_XDECREF(it->sought_tag);
2053 Py_XDECREF(it->root_element);
2054
2055 PyObject_GC_UnTrack(it);
2056 PyObject_GC_Del(it);
2057}
2058
2059static int
2060elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2061{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002062 Py_ssize_t i = it->parent_stack_used;
2063 while (i--)
2064 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065
2066 Py_VISIT(it->root_element);
2067 Py_VISIT(it->sought_tag);
2068 return 0;
2069}
2070
2071/* Helper function for elementiter_next. Add a new parent to the parent stack.
2072 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002073static int
2074parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002075{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002076 ParentLocator *item;
2077
2078 if (it->parent_stack_used >= it->parent_stack_size) {
2079 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2080 ParentLocator *parent_stack = it->parent_stack;
2081 PyMem_Resize(parent_stack, ParentLocator, new_size);
2082 if (parent_stack == NULL)
2083 return -1;
2084 it->parent_stack = parent_stack;
2085 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002086 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002087 item = it->parent_stack + it->parent_stack_used++;
2088 Py_INCREF(parent);
2089 item->parent = parent;
2090 item->child_index = 0;
2091 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092}
2093
2094static PyObject *
2095elementiter_next(ElementIterObject *it)
2096{
2097 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002098 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099 * A short note on gettext: this function serves both the iter() and
2100 * itertext() methods to avoid code duplication. However, there are a few
2101 * small differences in the way these iterations work. Namely:
2102 * - itertext() only yields text from nodes that have it, and continues
2103 * iterating when a node doesn't have text (so it doesn't return any
2104 * node like iter())
2105 * - itertext() also has to handle tail, after finishing with all the
2106 * children of a node.
2107 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002108 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002109 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002110 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111
2112 while (1) {
2113 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002114 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115 * iterator is exhausted.
2116 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002117 if (!it->parent_stack_used) {
2118 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 PyErr_SetNone(PyExc_StopIteration);
2120 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 }
2122
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002123 elem = it->root_element; /* steals a reference */
2124 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 }
2126 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002127 /* See if there are children left to traverse in the current parent. If
2128 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2131 Py_ssize_t child_index = item->child_index;
2132 ElementObjectExtra *extra;
2133 elem = item->parent;
2134 extra = elem->extra;
2135 if (!extra || child_index >= extra->length) {
2136 it->parent_stack_used--;
2137 /* Note that extra condition on it->parent_stack_used here;
2138 * this is because itertext() is supposed to only return *inner*
2139 * text, not text following the element it began iteration with.
2140 */
2141 if (it->gettext && it->parent_stack_used) {
2142 text = element_get_tail(elem);
2143 goto gettext;
2144 }
2145 Py_DECREF(elem);
2146 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002147 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002148
2149 elem = (ElementObject *)extra->children[child_index];
2150 item->child_index++;
2151 Py_INCREF(elem);
2152 }
2153
2154 if (parent_stack_push_new(it, elem) < 0) {
2155 Py_DECREF(elem);
2156 PyErr_NoMemory();
2157 return NULL;
2158 }
2159 if (it->gettext) {
2160 text = element_get_text(elem);
2161 goto gettext;
2162 }
2163
2164 if (it->sought_tag == Py_None)
2165 return (PyObject *)elem;
2166
2167 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2168 if (rc > 0)
2169 return (PyObject *)elem;
2170
2171 Py_DECREF(elem);
2172 if (rc < 0)
2173 return NULL;
2174 continue;
2175
2176gettext:
2177 if (!text) {
2178 Py_DECREF(elem);
2179 return NULL;
2180 }
2181 if (text == Py_None) {
2182 Py_DECREF(elem);
2183 }
2184 else {
2185 Py_INCREF(text);
2186 Py_DECREF(elem);
2187 rc = PyObject_IsTrue(text);
2188 if (rc > 0)
2189 return text;
2190 Py_DECREF(text);
2191 if (rc < 0)
2192 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002193 }
2194 }
2195
2196 return NULL;
2197}
2198
2199
2200static PyTypeObject ElementIter_Type = {
2201 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002202 /* Using the module's name since the pure-Python implementation does not
2203 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002204 "_elementtree._element_iterator", /* tp_name */
2205 sizeof(ElementIterObject), /* tp_basicsize */
2206 0, /* tp_itemsize */
2207 /* methods */
2208 (destructor)elementiter_dealloc, /* tp_dealloc */
2209 0, /* tp_print */
2210 0, /* tp_getattr */
2211 0, /* tp_setattr */
2212 0, /* tp_reserved */
2213 0, /* tp_repr */
2214 0, /* tp_as_number */
2215 0, /* tp_as_sequence */
2216 0, /* tp_as_mapping */
2217 0, /* tp_hash */
2218 0, /* tp_call */
2219 0, /* tp_str */
2220 0, /* tp_getattro */
2221 0, /* tp_setattro */
2222 0, /* tp_as_buffer */
2223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2224 0, /* tp_doc */
2225 (traverseproc)elementiter_traverse, /* tp_traverse */
2226 0, /* tp_clear */
2227 0, /* tp_richcompare */
2228 0, /* tp_weaklistoffset */
2229 PyObject_SelfIter, /* tp_iter */
2230 (iternextfunc)elementiter_next, /* tp_iternext */
2231 0, /* tp_methods */
2232 0, /* tp_members */
2233 0, /* tp_getset */
2234 0, /* tp_base */
2235 0, /* tp_dict */
2236 0, /* tp_descr_get */
2237 0, /* tp_descr_set */
2238 0, /* tp_dictoffset */
2239 0, /* tp_init */
2240 0, /* tp_alloc */
2241 0, /* tp_new */
2242};
2243
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002244#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002245
2246static PyObject *
2247create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2248{
2249 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002250
2251 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2252 if (!it)
2253 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254
Victor Stinner4d463432013-07-11 23:05:03 +02002255 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002256 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002258 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002259 it->root_element = self;
2260
Eli Bendersky64d11e62012-06-15 07:42:50 +03002261 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002262
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002263 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002264 if (it->parent_stack == NULL) {
2265 Py_DECREF(it);
2266 PyErr_NoMemory();
2267 return NULL;
2268 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002269 it->parent_stack_used = 0;
2270 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002271
Eli Bendersky64d11e62012-06-15 07:42:50 +03002272 return (PyObject *)it;
2273}
2274
2275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276/* ==================================================================== */
2277/* the tree builder type */
2278
2279typedef struct {
2280 PyObject_HEAD
2281
Eli Bendersky58d548d2012-05-29 15:45:16 +03002282 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283
Antoine Pitrouee329312012-10-04 19:53:29 +02002284 PyObject *this; /* current node */
2285 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286
Eli Bendersky58d548d2012-05-29 15:45:16 +03002287 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288
Eli Bendersky58d548d2012-05-29 15:45:16 +03002289 PyObject *stack; /* element stack */
2290 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291
Eli Bendersky48d358b2012-05-30 17:57:50 +03002292 PyObject *element_factory;
2293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002294 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002295 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002296 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2297 PyObject *end_event_obj;
2298 PyObject *start_ns_event_obj;
2299 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300} TreeBuilderObject;
2301
Christian Heimes90aa7642007-12-19 02:45:37 +00002302#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303
2304/* -------------------------------------------------------------------- */
2305/* constructor and destructor */
2306
Eli Bendersky58d548d2012-05-29 15:45:16 +03002307static PyObject *
2308treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2311 if (t != NULL) {
2312 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002313
Eli Bendersky58d548d2012-05-29 15:45:16 +03002314 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002315 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002317 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318
Eli Bendersky58d548d2012-05-29 15:45:16 +03002319 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002320 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 t->stack = PyList_New(20);
2322 if (!t->stack) {
2323 Py_DECREF(t->this);
2324 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002325 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 return NULL;
2327 }
2328 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002329
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002330 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002331 t->start_event_obj = t->end_event_obj = NULL;
2332 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2333 }
2334 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335}
2336
Serhiy Storchakacb985562015-05-04 15:32:48 +03002337/*[clinic input]
2338_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002339
Serhiy Storchakacb985562015-05-04 15:32:48 +03002340 element_factory: object = NULL
2341
2342[clinic start generated code]*/
2343
2344static int
2345_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2346 PyObject *element_factory)
2347/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2348{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349 if (element_factory) {
2350 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002351 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002352 }
2353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355}
2356
Eli Bendersky48d358b2012-05-30 17:57:50 +03002357static int
2358treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2359{
2360 Py_VISIT(self->root);
2361 Py_VISIT(self->this);
2362 Py_VISIT(self->last);
2363 Py_VISIT(self->data);
2364 Py_VISIT(self->stack);
2365 Py_VISIT(self->element_factory);
2366 return 0;
2367}
2368
2369static int
2370treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002372 Py_CLEAR(self->end_ns_event_obj);
2373 Py_CLEAR(self->start_ns_event_obj);
2374 Py_CLEAR(self->end_event_obj);
2375 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002376 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002377 Py_CLEAR(self->stack);
2378 Py_CLEAR(self->data);
2379 Py_CLEAR(self->last);
2380 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002381 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002382 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002383 return 0;
2384}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386static void
2387treebuilder_dealloc(TreeBuilderObject *self)
2388{
2389 PyObject_GC_UnTrack(self);
2390 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392}
2393
2394/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002395/* helpers for handling of arbitrary element-like objects */
2396
2397static int
2398treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2399 PyObject **dest, _Py_Identifier *name)
2400{
2401 if (Element_CheckExact(element)) {
2402 Py_DECREF(JOIN_OBJ(*dest));
2403 *dest = JOIN_SET(data, PyList_CheckExact(data));
2404 return 0;
2405 }
2406 else {
2407 PyObject *joined = list_join(data);
2408 int r;
2409 if (joined == NULL)
2410 return -1;
2411 r = _PyObject_SetAttrId(element, name, joined);
2412 Py_DECREF(joined);
2413 return r;
2414 }
2415}
2416
2417/* These two functions steal a reference to data */
2418static int
2419treebuilder_set_element_text(PyObject *element, PyObject *data)
2420{
2421 _Py_IDENTIFIER(text);
2422 return treebuilder_set_element_text_or_tail(
2423 element, data, &((ElementObject *) element)->text, &PyId_text);
2424}
2425
2426static int
2427treebuilder_set_element_tail(PyObject *element, PyObject *data)
2428{
2429 _Py_IDENTIFIER(tail);
2430 return treebuilder_set_element_text_or_tail(
2431 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2432}
2433
2434static int
2435treebuilder_add_subelement(PyObject *element, PyObject *child)
2436{
2437 _Py_IDENTIFIER(append);
2438 if (Element_CheckExact(element)) {
2439 ElementObject *elem = (ElementObject *) element;
2440 return element_add_subelement(elem, child);
2441 }
2442 else {
2443 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002444 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002445 if (res == NULL)
2446 return -1;
2447 Py_DECREF(res);
2448 return 0;
2449 }
2450}
2451
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002452LOCAL(int)
2453treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2454 PyObject *node)
2455{
2456 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002457 PyObject *res;
2458 PyObject *event = PyTuple_Pack(2, action, node);
2459 if (event == NULL)
2460 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002461 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002462 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002463 if (res == NULL)
2464 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002465 Py_DECREF(res);
2466 }
2467 return 0;
2468}
2469
Antoine Pitrouee329312012-10-04 19:53:29 +02002470/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471/* handlers */
2472
2473LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2475 PyObject* attrib)
2476{
2477 PyObject* node;
2478 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002479 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480
2481 if (self->data) {
2482 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002483 if (treebuilder_set_element_text(self->last, self->data))
2484 return NULL;
2485 }
2486 else {
2487 if (treebuilder_set_element_tail(self->last, self->data))
2488 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 }
2490 self->data = NULL;
2491 }
2492
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002493 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002494 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002495 } else if (attrib == Py_None) {
2496 attrib = PyDict_New();
2497 if (!attrib)
2498 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002499 node = PyObject_CallFunctionObjArgs(self->element_factory,
2500 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002501 Py_DECREF(attrib);
2502 }
2503 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002504 node = PyObject_CallFunctionObjArgs(self->element_factory,
2505 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002506 }
2507 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002509 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510
Antoine Pitrouee329312012-10-04 19:53:29 +02002511 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512
2513 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002514 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002515 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002516 } else {
2517 if (self->root) {
2518 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002519 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 "multiple elements on top level"
2521 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002522 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 }
2524 Py_INCREF(node);
2525 self->root = node;
2526 }
2527
2528 if (self->index < PyList_GET_SIZE(self->stack)) {
2529 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002530 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 Py_INCREF(this);
2532 } else {
2533 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002534 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 }
2536 self->index++;
2537
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002539 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002541 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002543 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2544 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545
2546 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002547
2548 error:
2549 Py_DECREF(node);
2550 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551}
2552
2553LOCAL(PyObject*)
2554treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2555{
2556 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002557 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002558 /* ignore calls to data before the first call to start */
2559 Py_RETURN_NONE;
2560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 /* store the first item as is */
2562 Py_INCREF(data); self->data = data;
2563 } else {
2564 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002565 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2566 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002567 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 /* expat often generates single character data sections; handle
2569 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002570 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2571 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002573 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 } else if (PyList_CheckExact(self->data)) {
2575 if (PyList_Append(self->data, data) < 0)
2576 return NULL;
2577 } else {
2578 PyObject* list = PyList_New(2);
2579 if (!list)
2580 return NULL;
2581 PyList_SET_ITEM(list, 0, self->data);
2582 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2583 self->data = list;
2584 }
2585 }
2586
2587 Py_RETURN_NONE;
2588}
2589
2590LOCAL(PyObject*)
2591treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2592{
2593 PyObject* item;
2594
2595 if (self->data) {
2596 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002597 if (treebuilder_set_element_text(self->last, self->data))
2598 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002600 if (treebuilder_set_element_tail(self->last, self->data))
2601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 }
2603 self->data = NULL;
2604 }
2605
2606 if (self->index == 0) {
2607 PyErr_SetString(
2608 PyExc_IndexError,
2609 "pop from empty stack"
2610 );
2611 return NULL;
2612 }
2613
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002614 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002615 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002616 self->index--;
2617 self->this = PyList_GET_ITEM(self->stack, self->index);
2618 Py_INCREF(self->this);
2619 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002621 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2622 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623
2624 Py_INCREF(self->last);
2625 return (PyObject*) self->last;
2626}
2627
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628/* -------------------------------------------------------------------- */
2629/* methods (in alphabetical order) */
2630
Serhiy Storchakacb985562015-05-04 15:32:48 +03002631/*[clinic input]
2632_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633
Serhiy Storchakacb985562015-05-04 15:32:48 +03002634 data: object
2635 /
2636
2637[clinic start generated code]*/
2638
2639static PyObject *
2640_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2641/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2642{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 return treebuilder_handle_data(self, data);
2644}
2645
Serhiy Storchakacb985562015-05-04 15:32:48 +03002646/*[clinic input]
2647_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648
Serhiy Storchakacb985562015-05-04 15:32:48 +03002649 tag: object
2650 /
2651
2652[clinic start generated code]*/
2653
2654static PyObject *
2655_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2656/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2657{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658 return treebuilder_handle_end(self, tag);
2659}
2660
2661LOCAL(PyObject*)
2662treebuilder_done(TreeBuilderObject* self)
2663{
2664 PyObject* res;
2665
2666 /* FIXME: check stack size? */
2667
2668 if (self->root)
2669 res = self->root;
2670 else
2671 res = Py_None;
2672
2673 Py_INCREF(res);
2674 return res;
2675}
2676
Serhiy Storchakacb985562015-05-04 15:32:48 +03002677/*[clinic input]
2678_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002679
Serhiy Storchakacb985562015-05-04 15:32:48 +03002680[clinic start generated code]*/
2681
2682static PyObject *
2683_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2684/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2685{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686 return treebuilder_done(self);
2687}
2688
Serhiy Storchakacb985562015-05-04 15:32:48 +03002689/*[clinic input]
2690_elementtree.TreeBuilder.start
2691
2692 tag: object
2693 attrs: object = None
2694 /
2695
2696[clinic start generated code]*/
2697
2698static PyObject *
2699_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2700 PyObject *attrs)
2701/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002703 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704}
2705
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706/* ==================================================================== */
2707/* the expat interface */
2708
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002711
2712/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2713 * cached globally without being in per-module state.
2714 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002715static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717
Eli Bendersky52467b12012-06-01 07:13:08 +03002718static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2719 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721typedef struct {
2722 PyObject_HEAD
2723
2724 XML_Parser parser;
2725
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002726 PyObject *target;
2727 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002731 PyObject *handle_start;
2732 PyObject *handle_data;
2733 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002735 PyObject *handle_comment;
2736 PyObject *handle_pi;
2737 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002739 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002740
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741} XMLParserObject;
2742
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002743static PyObject*
2744_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2745static PyObject *
2746_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2747 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002748
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749/* helpers */
2750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751LOCAL(PyObject*)
2752makeuniversal(XMLParserObject* self, const char* string)
2753{
2754 /* convert a UTF-8 tag/attribute name from the expat parser
2755 to a universal name string */
2756
Antoine Pitrouc1948842012-10-01 23:40:37 +02002757 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758 PyObject* key;
2759 PyObject* value;
2760
2761 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002762 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763 if (!key)
2764 return NULL;
2765
2766 value = PyDict_GetItem(self->names, key);
2767
2768 if (value) {
2769 Py_INCREF(value);
2770 } else {
2771 /* new name. convert to universal name, and decode as
2772 necessary */
2773
2774 PyObject* tag;
2775 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002776 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777
2778 /* look for namespace separator */
2779 for (i = 0; i < size; i++)
2780 if (string[i] == '}')
2781 break;
2782 if (i != size) {
2783 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002784 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002785 if (tag == NULL) {
2786 Py_DECREF(key);
2787 return NULL;
2788 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002789 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002790 p[0] = '{';
2791 memcpy(p+1, string, size);
2792 size++;
2793 } else {
2794 /* plain name; use key as tag */
2795 Py_INCREF(key);
2796 tag = key;
2797 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002798
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002800 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002801 value = PyUnicode_DecodeUTF8(p, size, "strict");
2802 Py_DECREF(tag);
2803 if (!value) {
2804 Py_DECREF(key);
2805 return NULL;
2806 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807
2808 /* add to names dictionary */
2809 if (PyDict_SetItem(self->names, key, value) < 0) {
2810 Py_DECREF(key);
2811 Py_DECREF(value);
2812 return NULL;
2813 }
2814 }
2815
2816 Py_DECREF(key);
2817 return value;
2818}
2819
Eli Bendersky5b77d812012-03-16 08:20:05 +02002820/* Set the ParseError exception with the given parameters.
2821 * If message is not NULL, it's used as the error string. Otherwise, the
2822 * message string is the default for the given error_code.
2823*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002825expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2826 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002827{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002828 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002829 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002830
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002831 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002832 message ? message : EXPAT(ErrorString)(error_code),
2833 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002834 if (errmsg == NULL)
2835 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002836
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002837 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002838 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002839 if (!error)
2840 return;
2841
Eli Bendersky5b77d812012-03-16 08:20:05 +02002842 /* Add code and position attributes */
2843 code = PyLong_FromLong((long)error_code);
2844 if (!code) {
2845 Py_DECREF(error);
2846 return;
2847 }
2848 if (PyObject_SetAttrString(error, "code", code) == -1) {
2849 Py_DECREF(error);
2850 Py_DECREF(code);
2851 return;
2852 }
2853 Py_DECREF(code);
2854
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002855 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002856 if (!position) {
2857 Py_DECREF(error);
2858 return;
2859 }
2860 if (PyObject_SetAttrString(error, "position", position) == -1) {
2861 Py_DECREF(error);
2862 Py_DECREF(position);
2863 return;
2864 }
2865 Py_DECREF(position);
2866
Eli Bendersky532d03e2013-08-10 08:00:39 -07002867 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002868 Py_DECREF(error);
2869}
2870
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002871/* -------------------------------------------------------------------- */
2872/* handlers */
2873
2874static void
2875expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2876 int data_len)
2877{
2878 PyObject* key;
2879 PyObject* value;
2880 PyObject* res;
2881
2882 if (data_len < 2 || data_in[0] != '&')
2883 return;
2884
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002885 if (PyErr_Occurred())
2886 return;
2887
Neal Norwitz0269b912007-08-08 06:56:02 +00002888 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002889 if (!key)
2890 return;
2891
2892 value = PyDict_GetItem(self->entity, key);
2893
2894 if (value) {
2895 if (TreeBuilder_CheckExact(self->target))
2896 res = treebuilder_handle_data(
2897 (TreeBuilderObject*) self->target, value
2898 );
2899 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002900 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901 else
2902 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002904 } else if (!PyErr_Occurred()) {
2905 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002906 char message[128] = "undefined entity ";
2907 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002908 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002909 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002911 EXPAT(GetErrorColumnNumber)(self->parser),
2912 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913 );
2914 }
2915
2916 Py_DECREF(key);
2917}
2918
2919static void
2920expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2921 const XML_Char **attrib_in)
2922{
2923 PyObject* res;
2924 PyObject* tag;
2925 PyObject* attrib;
2926 int ok;
2927
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002928 if (PyErr_Occurred())
2929 return;
2930
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 /* tag name */
2932 tag = makeuniversal(self, tag_in);
2933 if (!tag)
2934 return; /* parser will look for errors */
2935
2936 /* attributes */
2937 if (attrib_in[0]) {
2938 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002939 if (!attrib) {
2940 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002942 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 while (attrib_in[0] && attrib_in[1]) {
2944 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002945 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 if (!key || !value) {
2947 Py_XDECREF(value);
2948 Py_XDECREF(key);
2949 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002950 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002951 return;
2952 }
2953 ok = PyDict_SetItem(attrib, key, value);
2954 Py_DECREF(value);
2955 Py_DECREF(key);
2956 if (ok < 0) {
2957 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002958 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 return;
2960 }
2961 attrib_in += 2;
2962 }
2963 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002964 Py_INCREF(Py_None);
2965 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002966 }
2967
2968 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 /* shortcut */
2970 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2971 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002972 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002973 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002974 if (attrib == Py_None) {
2975 Py_DECREF(attrib);
2976 attrib = PyDict_New();
2977 if (!attrib) {
2978 Py_DECREF(tag);
2979 return;
2980 }
2981 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002982 res = PyObject_CallFunctionObjArgs(self->handle_start,
2983 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002984 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 res = NULL;
2986
2987 Py_DECREF(tag);
2988 Py_DECREF(attrib);
2989
2990 Py_XDECREF(res);
2991}
2992
2993static void
2994expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2995 int data_len)
2996{
2997 PyObject* data;
2998 PyObject* res;
2999
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003000 if (PyErr_Occurred())
3001 return;
3002
Neal Norwitz0269b912007-08-08 06:56:02 +00003003 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003004 if (!data)
3005 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006
3007 if (TreeBuilder_CheckExact(self->target))
3008 /* shortcut */
3009 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3010 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003011 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003012 else
3013 res = NULL;
3014
3015 Py_DECREF(data);
3016
3017 Py_XDECREF(res);
3018}
3019
3020static void
3021expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3022{
3023 PyObject* tag;
3024 PyObject* res = NULL;
3025
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003026 if (PyErr_Occurred())
3027 return;
3028
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029 if (TreeBuilder_CheckExact(self->target))
3030 /* shortcut */
3031 /* the standard tree builder doesn't look at the end tag */
3032 res = treebuilder_handle_end(
3033 (TreeBuilderObject*) self->target, Py_None
3034 );
3035 else if (self->handle_end) {
3036 tag = makeuniversal(self, tag_in);
3037 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003038 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 Py_DECREF(tag);
3040 }
3041 }
3042
3043 Py_XDECREF(res);
3044}
3045
3046static void
3047expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3048 const XML_Char *uri)
3049{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003050 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3051 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003052
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003053 if (PyErr_Occurred())
3054 return;
3055
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003056 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003057 return;
3058
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003059 if (!uri)
3060 uri = "";
3061 if (!prefix)
3062 prefix = "";
3063
3064 parcel = Py_BuildValue("ss", prefix, uri);
3065 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003066 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003067 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3068 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069}
3070
3071static void
3072expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3073{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003074 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3075
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003076 if (PyErr_Occurred())
3077 return;
3078
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003079 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003080 return;
3081
3082 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083}
3084
3085static void
3086expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3087{
3088 PyObject* comment;
3089 PyObject* res;
3090
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003091 if (PyErr_Occurred())
3092 return;
3093
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003095 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003097 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3098 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099 Py_XDECREF(res);
3100 Py_DECREF(comment);
3101 }
3102 }
3103}
3104
Eli Bendersky45839902013-01-13 05:14:47 -08003105static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003106expat_start_doctype_handler(XMLParserObject *self,
3107 const XML_Char *doctype_name,
3108 const XML_Char *sysid,
3109 const XML_Char *pubid,
3110 int has_internal_subset)
3111{
3112 PyObject *self_pyobj = (PyObject *)self;
3113 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3114 PyObject *parser_doctype = NULL;
3115 PyObject *res = NULL;
3116
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003117 if (PyErr_Occurred())
3118 return;
3119
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003120 doctype_name_obj = makeuniversal(self, doctype_name);
3121 if (!doctype_name_obj)
3122 return;
3123
3124 if (sysid) {
3125 sysid_obj = makeuniversal(self, sysid);
3126 if (!sysid_obj) {
3127 Py_DECREF(doctype_name_obj);
3128 return;
3129 }
3130 } else {
3131 Py_INCREF(Py_None);
3132 sysid_obj = Py_None;
3133 }
3134
3135 if (pubid) {
3136 pubid_obj = makeuniversal(self, pubid);
3137 if (!pubid_obj) {
3138 Py_DECREF(doctype_name_obj);
3139 Py_DECREF(sysid_obj);
3140 return;
3141 }
3142 } else {
3143 Py_INCREF(Py_None);
3144 pubid_obj = Py_None;
3145 }
3146
3147 /* If the target has a handler for doctype, call it. */
3148 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003149 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3150 doctype_name_obj, pubid_obj,
3151 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003152 Py_CLEAR(res);
3153 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003154 else {
3155 /* Now see if the parser itself has a doctype method. If yes and it's
3156 * a custom method, call it but warn about deprecation. If it's only
3157 * the vanilla XMLParser method, do nothing.
3158 */
3159 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3160 if (parser_doctype &&
3161 !(PyCFunction_Check(parser_doctype) &&
3162 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3163 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003164 (PyCFunction) _elementtree_XMLParser_doctype)) {
3165 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3166 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003167 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003168 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003169 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003170 res = PyObject_CallFunctionObjArgs(parser_doctype,
3171 doctype_name_obj, pubid_obj,
3172 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003173 Py_CLEAR(res);
3174 }
3175 }
3176
3177clear:
3178 Py_XDECREF(parser_doctype);
3179 Py_DECREF(doctype_name_obj);
3180 Py_DECREF(pubid_obj);
3181 Py_DECREF(sysid_obj);
3182}
3183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184static void
3185expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3186 const XML_Char* data_in)
3187{
3188 PyObject* target;
3189 PyObject* data;
3190 PyObject* res;
3191
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003192 if (PyErr_Occurred())
3193 return;
3194
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003195 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003196 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3197 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003199 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3200 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201 Py_XDECREF(res);
3202 Py_DECREF(data);
3203 Py_DECREF(target);
3204 } else {
3205 Py_XDECREF(data);
3206 Py_XDECREF(target);
3207 }
3208 }
3209}
3210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212
Eli Bendersky52467b12012-06-01 07:13:08 +03003213static PyObject *
3214xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215{
Eli Bendersky52467b12012-06-01 07:13:08 +03003216 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3217 if (self) {
3218 self->parser = NULL;
3219 self->target = self->entity = self->names = NULL;
3220 self->handle_start = self->handle_data = self->handle_end = NULL;
3221 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003222 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003224 return (PyObject *)self;
3225}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226
Serhiy Storchakacb985562015-05-04 15:32:48 +03003227/*[clinic input]
3228_elementtree.XMLParser.__init__
3229
3230 html: object = NULL
3231 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003232 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003233
3234[clinic start generated code]*/
3235
Eli Bendersky52467b12012-06-01 07:13:08 +03003236static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003237_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3238 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003239/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003240{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003241 self->entity = PyDict_New();
3242 if (!self->entity)
3243 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244
Serhiy Storchakacb985562015-05-04 15:32:48 +03003245 self->names = PyDict_New();
3246 if (!self->names) {
3247 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003250
Serhiy Storchakacb985562015-05-04 15:32:48 +03003251 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3252 if (!self->parser) {
3253 Py_CLEAR(self->entity);
3254 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 }
3258
Eli Bendersky52467b12012-06-01 07:13:08 +03003259 if (target) {
3260 Py_INCREF(target);
3261 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003262 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003264 Py_CLEAR(self->entity);
3265 Py_CLEAR(self->names);
3266 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003269 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272 self->handle_start = PyObject_GetAttrString(target, "start");
3273 self->handle_data = PyObject_GetAttrString(target, "data");
3274 self->handle_end = PyObject_GetAttrString(target, "end");
3275 self->handle_comment = PyObject_GetAttrString(target, "comment");
3276 self->handle_pi = PyObject_GetAttrString(target, "pi");
3277 self->handle_close = PyObject_GetAttrString(target, "close");
3278 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279
3280 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003283 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 (XML_StartElementHandler) expat_start_handler,
3287 (XML_EndElementHandler) expat_end_handler
3288 );
3289 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003290 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 (XML_DefaultHandler) expat_default_handler
3292 );
3293 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003294 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 (XML_CharacterDataHandler) expat_data_handler
3296 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003299 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 (XML_CommentHandler) expat_comment_handler
3301 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 (XML_ProcessingInstructionHandler) expat_pi_handler
3306 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003307 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003308 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003309 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3310 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003313 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315
Eli Bendersky52467b12012-06-01 07:13:08 +03003316 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317}
3318
Eli Bendersky52467b12012-06-01 07:13:08 +03003319static int
3320xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3321{
3322 Py_VISIT(self->handle_close);
3323 Py_VISIT(self->handle_pi);
3324 Py_VISIT(self->handle_comment);
3325 Py_VISIT(self->handle_end);
3326 Py_VISIT(self->handle_data);
3327 Py_VISIT(self->handle_start);
3328
3329 Py_VISIT(self->target);
3330 Py_VISIT(self->entity);
3331 Py_VISIT(self->names);
3332
3333 return 0;
3334}
3335
3336static int
3337xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338{
3339 EXPAT(ParserFree)(self->parser);
3340
Antoine Pitrouc1948842012-10-01 23:40:37 +02003341 Py_CLEAR(self->handle_close);
3342 Py_CLEAR(self->handle_pi);
3343 Py_CLEAR(self->handle_comment);
3344 Py_CLEAR(self->handle_end);
3345 Py_CLEAR(self->handle_data);
3346 Py_CLEAR(self->handle_start);
3347 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
Antoine Pitrouc1948842012-10-01 23:40:37 +02003349 Py_CLEAR(self->target);
3350 Py_CLEAR(self->entity);
3351 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352
Eli Bendersky52467b12012-06-01 07:13:08 +03003353 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354}
3355
Eli Bendersky52467b12012-06-01 07:13:08 +03003356static void
3357xmlparser_dealloc(XMLParserObject* self)
3358{
3359 PyObject_GC_UnTrack(self);
3360 xmlparser_gc_clear(self);
3361 Py_TYPE(self)->tp_free((PyObject *)self);
3362}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003363
3364LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003365expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366{
3367 int ok;
3368
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003369 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3371
3372 if (PyErr_Occurred())
3373 return NULL;
3374
3375 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003376 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003377 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003379 EXPAT(GetErrorColumnNumber)(self->parser),
3380 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381 );
3382 return NULL;
3383 }
3384
3385 Py_RETURN_NONE;
3386}
3387
Serhiy Storchakacb985562015-05-04 15:32:48 +03003388/*[clinic input]
3389_elementtree.XMLParser.close
3390
3391[clinic start generated code]*/
3392
3393static PyObject *
3394_elementtree_XMLParser_close_impl(XMLParserObject *self)
3395/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396{
3397 /* end feeding data to parser */
3398
3399 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003401 if (!res)
3402 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003404 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 Py_DECREF(res);
3406 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003407 }
3408 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003409 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003410 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003411 }
3412 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003413 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003414 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003415}
3416
Serhiy Storchakacb985562015-05-04 15:32:48 +03003417/*[clinic input]
3418_elementtree.XMLParser.feed
3419
3420 data: object
3421 /
3422
3423[clinic start generated code]*/
3424
3425static PyObject *
3426_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3427/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003428{
3429 /* feed data to parser */
3430
Serhiy Storchakacb985562015-05-04 15:32:48 +03003431 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003432 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003433 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3434 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003435 return NULL;
3436 if (data_len > INT_MAX) {
3437 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3438 return NULL;
3439 }
3440 /* Explicitly set UTF-8 encoding. Return code ignored. */
3441 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003442 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003443 }
3444 else {
3445 Py_buffer view;
3446 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003447 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003448 return NULL;
3449 if (view.len > INT_MAX) {
3450 PyBuffer_Release(&view);
3451 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3452 return NULL;
3453 }
3454 res = expat_parse(self, view.buf, (int)view.len, 0);
3455 PyBuffer_Release(&view);
3456 return res;
3457 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458}
3459
Serhiy Storchakacb985562015-05-04 15:32:48 +03003460/*[clinic input]
3461_elementtree.XMLParser._parse_whole
3462
3463 file: object
3464 /
3465
3466[clinic start generated code]*/
3467
3468static PyObject *
3469_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3470/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471{
Eli Benderskya3699232013-05-19 18:47:23 -07003472 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 PyObject* reader;
3474 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003475 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 PyObject* res;
3477
Serhiy Storchakacb985562015-05-04 15:32:48 +03003478 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 if (!reader)
3480 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003481
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 /* read from open file object */
3483 for (;;) {
3484
3485 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3486
3487 if (!buffer) {
3488 /* read failed (e.g. due to KeyboardInterrupt) */
3489 Py_DECREF(reader);
3490 return NULL;
3491 }
3492
Eli Benderskyf996e772012-03-16 05:53:30 +02003493 if (PyUnicode_CheckExact(buffer)) {
3494 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003495 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003496 Py_DECREF(buffer);
3497 break;
3498 }
3499 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003500 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003501 if (!temp) {
3502 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003503 Py_DECREF(reader);
3504 return NULL;
3505 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003506 buffer = temp;
3507 }
3508 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509 Py_DECREF(buffer);
3510 break;
3511 }
3512
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003513 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3514 Py_DECREF(buffer);
3515 Py_DECREF(reader);
3516 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3517 return NULL;
3518 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003519 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003520 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521 );
3522
3523 Py_DECREF(buffer);
3524
3525 if (!res) {
3526 Py_DECREF(reader);
3527 return NULL;
3528 }
3529 Py_DECREF(res);
3530
3531 }
3532
3533 Py_DECREF(reader);
3534
3535 res = expat_parse(self, "", 0, 1);
3536
3537 if (res && TreeBuilder_CheckExact(self->target)) {
3538 Py_DECREF(res);
3539 return treebuilder_done((TreeBuilderObject*) self->target);
3540 }
3541
3542 return res;
3543}
3544
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545/*[clinic input]
3546_elementtree.XMLParser.doctype
3547
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003548 name: object
3549 pubid: object
3550 system: object
3551 /
3552
Serhiy Storchakacb985562015-05-04 15:32:48 +03003553[clinic start generated code]*/
3554
3555static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003556_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3557 PyObject *pubid, PyObject *system)
3558/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003559{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003560 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3561 "This method of XMLParser is deprecated. Define"
3562 " doctype() method on the TreeBuilder target.",
3563 1) < 0) {
3564 return NULL;
3565 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003566 Py_RETURN_NONE;
3567}
3568
Serhiy Storchakacb985562015-05-04 15:32:48 +03003569/*[clinic input]
3570_elementtree.XMLParser._setevents
3571
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003572 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003573 events_to_report: object = None
3574 /
3575
3576[clinic start generated code]*/
3577
3578static PyObject *
3579_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3580 PyObject *events_queue,
3581 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003582/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583{
3584 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003585 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003586 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003587 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588
3589 if (!TreeBuilder_CheckExact(self->target)) {
3590 PyErr_SetString(
3591 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003592 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593 "targets"
3594 );
3595 return NULL;
3596 }
3597
3598 target = (TreeBuilderObject*) self->target;
3599
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003600 events_append = PyObject_GetAttrString(events_queue, "append");
3601 if (events_append == NULL)
3602 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003603 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
3605 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003606 Py_CLEAR(target->start_event_obj);
3607 Py_CLEAR(target->end_event_obj);
3608 Py_CLEAR(target->start_ns_event_obj);
3609 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003611 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003613 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 Py_RETURN_NONE;
3615 }
3616
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003617 if (!(events_seq = PySequence_Fast(events_to_report,
3618 "events must be a sequence"))) {
3619 return NULL;
3620 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003622 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003623 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003624 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003625 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003626 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003627 } else if (PyBytes_Check(event_name_obj)) {
3628 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003629 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003630 if (event_name == NULL) {
3631 Py_DECREF(events_seq);
3632 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3633 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003634 }
3635
3636 Py_INCREF(event_name_obj);
3637 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003638 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003639 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003640 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003641 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003642 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003643 EXPAT(SetNamespaceDeclHandler)(
3644 self->parser,
3645 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3646 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3647 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003648 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003649 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003650 EXPAT(SetNamespaceDeclHandler)(
3651 self->parser,
3652 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3653 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3654 );
3655 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003656 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003657 Py_DECREF(events_seq);
3658 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659 return NULL;
3660 }
3661 }
3662
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665}
3666
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003667static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003668xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003670 if (PyUnicode_Check(nameobj)) {
3671 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003672 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003673 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003674 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003675 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003676 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003677 return PyUnicode_FromFormat(
3678 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003680 }
3681 else
3682 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683
Alexander Belopolskye239d232010-12-08 23:31:48 +00003684 Py_INCREF(res);
3685 return res;
3686 }
3687 generic:
3688 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689}
3690
Serhiy Storchakacb985562015-05-04 15:32:48 +03003691#include "clinic/_elementtree.c.h"
3692
3693static PyMethodDef element_methods[] = {
3694
3695 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3696
3697 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3698 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3699
3700 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3701 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3702 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3703
3704 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3705 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3706 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3707 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3708
3709 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3710 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3711 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3712
Victor Stinner84d8baa2016-09-29 22:12:35 +02003713 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003714 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3715
3716 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3717 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3718
3719 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3720
3721 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3722 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3723 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3724 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3725 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3726
3727 {NULL, NULL}
3728};
3729
3730static PyMappingMethods element_as_mapping = {
3731 (lenfunc) element_length,
3732 (binaryfunc) element_subscr,
3733 (objobjargproc) element_ass_subscr,
3734};
3735
Serhiy Storchakadde08152015-11-25 15:28:13 +02003736static PyGetSetDef element_getsetlist[] = {
3737 {"tag",
3738 (getter)element_tag_getter,
3739 (setter)element_tag_setter,
3740 "A string identifying what kind of data this element represents"},
3741 {"text",
3742 (getter)element_text_getter,
3743 (setter)element_text_setter,
3744 "A string of text directly after the start tag, or None"},
3745 {"tail",
3746 (getter)element_tail_getter,
3747 (setter)element_tail_setter,
3748 "A string of text directly after the end tag, or None"},
3749 {"attrib",
3750 (getter)element_attrib_getter,
3751 (setter)element_attrib_setter,
3752 "A dictionary containing the element's attributes"},
3753 {NULL},
3754};
3755
Serhiy Storchakacb985562015-05-04 15:32:48 +03003756static PyTypeObject Element_Type = {
3757 PyVarObject_HEAD_INIT(NULL, 0)
3758 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3759 /* methods */
3760 (destructor)element_dealloc, /* tp_dealloc */
3761 0, /* tp_print */
3762 0, /* tp_getattr */
3763 0, /* tp_setattr */
3764 0, /* tp_reserved */
3765 (reprfunc)element_repr, /* tp_repr */
3766 0, /* tp_as_number */
3767 &element_as_sequence, /* tp_as_sequence */
3768 &element_as_mapping, /* tp_as_mapping */
3769 0, /* tp_hash */
3770 0, /* tp_call */
3771 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003772 PyObject_GenericGetAttr, /* tp_getattro */
3773 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003774 0, /* tp_as_buffer */
3775 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3776 /* tp_flags */
3777 0, /* tp_doc */
3778 (traverseproc)element_gc_traverse, /* tp_traverse */
3779 (inquiry)element_gc_clear, /* tp_clear */
3780 0, /* tp_richcompare */
3781 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3782 0, /* tp_iter */
3783 0, /* tp_iternext */
3784 element_methods, /* tp_methods */
3785 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003786 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003787 0, /* tp_base */
3788 0, /* tp_dict */
3789 0, /* tp_descr_get */
3790 0, /* tp_descr_set */
3791 0, /* tp_dictoffset */
3792 (initproc)element_init, /* tp_init */
3793 PyType_GenericAlloc, /* tp_alloc */
3794 element_new, /* tp_new */
3795 0, /* tp_free */
3796};
3797
3798static PyMethodDef treebuilder_methods[] = {
3799 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3800 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3801 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3802 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3803 {NULL, NULL}
3804};
3805
3806static PyTypeObject TreeBuilder_Type = {
3807 PyVarObject_HEAD_INIT(NULL, 0)
3808 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3809 /* methods */
3810 (destructor)treebuilder_dealloc, /* tp_dealloc */
3811 0, /* tp_print */
3812 0, /* tp_getattr */
3813 0, /* tp_setattr */
3814 0, /* tp_reserved */
3815 0, /* tp_repr */
3816 0, /* tp_as_number */
3817 0, /* tp_as_sequence */
3818 0, /* tp_as_mapping */
3819 0, /* tp_hash */
3820 0, /* tp_call */
3821 0, /* tp_str */
3822 0, /* tp_getattro */
3823 0, /* tp_setattro */
3824 0, /* tp_as_buffer */
3825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3826 /* tp_flags */
3827 0, /* tp_doc */
3828 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3829 (inquiry)treebuilder_gc_clear, /* tp_clear */
3830 0, /* tp_richcompare */
3831 0, /* tp_weaklistoffset */
3832 0, /* tp_iter */
3833 0, /* tp_iternext */
3834 treebuilder_methods, /* tp_methods */
3835 0, /* tp_members */
3836 0, /* tp_getset */
3837 0, /* tp_base */
3838 0, /* tp_dict */
3839 0, /* tp_descr_get */
3840 0, /* tp_descr_set */
3841 0, /* tp_dictoffset */
3842 _elementtree_TreeBuilder___init__, /* tp_init */
3843 PyType_GenericAlloc, /* tp_alloc */
3844 treebuilder_new, /* tp_new */
3845 0, /* tp_free */
3846};
3847
3848static PyMethodDef xmlparser_methods[] = {
3849 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3850 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3851 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3852 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3853 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3854 {NULL, NULL}
3855};
3856
Neal Norwitz227b5332006-03-22 09:28:35 +00003857static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003858 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003859 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003860 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003861 (destructor)xmlparser_dealloc, /* tp_dealloc */
3862 0, /* tp_print */
3863 0, /* tp_getattr */
3864 0, /* tp_setattr */
3865 0, /* tp_reserved */
3866 0, /* tp_repr */
3867 0, /* tp_as_number */
3868 0, /* tp_as_sequence */
3869 0, /* tp_as_mapping */
3870 0, /* tp_hash */
3871 0, /* tp_call */
3872 0, /* tp_str */
3873 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3874 0, /* tp_setattro */
3875 0, /* tp_as_buffer */
3876 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3877 /* tp_flags */
3878 0, /* tp_doc */
3879 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3880 (inquiry)xmlparser_gc_clear, /* tp_clear */
3881 0, /* tp_richcompare */
3882 0, /* tp_weaklistoffset */
3883 0, /* tp_iter */
3884 0, /* tp_iternext */
3885 xmlparser_methods, /* tp_methods */
3886 0, /* tp_members */
3887 0, /* tp_getset */
3888 0, /* tp_base */
3889 0, /* tp_dict */
3890 0, /* tp_descr_get */
3891 0, /* tp_descr_set */
3892 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003893 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003894 PyType_GenericAlloc, /* tp_alloc */
3895 xmlparser_new, /* tp_new */
3896 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003897};
3898
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003899/* ==================================================================== */
3900/* python module interface */
3901
3902static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003903 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003904 {NULL, NULL}
3905};
3906
Martin v. Löwis1a214512008-06-11 05:26:20 +00003907
Eli Bendersky532d03e2013-08-10 08:00:39 -07003908static struct PyModuleDef elementtreemodule = {
3909 PyModuleDef_HEAD_INIT,
3910 "_elementtree",
3911 NULL,
3912 sizeof(elementtreestate),
3913 _functions,
3914 NULL,
3915 elementtree_traverse,
3916 elementtree_clear,
3917 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003918};
3919
Neal Norwitzf6657e62006-12-28 04:47:50 +00003920PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003921PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003922{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003923 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003924 elementtreestate *st;
3925
3926 m = PyState_FindModule(&elementtreemodule);
3927 if (m) {
3928 Py_INCREF(m);
3929 return m;
3930 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003931
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003932 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003933 if (PyType_Ready(&ElementIter_Type) < 0)
3934 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003935 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003936 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003937 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003938 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003939 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003940 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003941
Eli Bendersky532d03e2013-08-10 08:00:39 -07003942 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003943 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003944 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003945 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003946
Eli Bendersky828efde2012-04-05 05:40:58 +03003947 if (!(temp = PyImport_ImportModule("copy")))
3948 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003949 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003950 Py_XDECREF(temp);
3951
Eli Bendersky532d03e2013-08-10 08:00:39 -07003952 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003953 return NULL;
3954
Eli Bendersky20d41742012-06-01 09:48:37 +03003955 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003956 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3957 if (expat_capi) {
3958 /* check that it's usable */
3959 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003960 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003961 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3962 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003963 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003964 PyErr_SetString(PyExc_ImportError,
3965 "pyexpat version is incompatible");
3966 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003967 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003968 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003969 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003970 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003971
Eli Bendersky532d03e2013-08-10 08:00:39 -07003972 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003973 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003974 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003975 Py_INCREF(st->parseerror_obj);
3976 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003977
Eli Bendersky092af1f2012-03-04 07:14:03 +02003978 Py_INCREF((PyObject *)&Element_Type);
3979 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3980
Eli Bendersky58d548d2012-05-29 15:45:16 +03003981 Py_INCREF((PyObject *)&TreeBuilder_Type);
3982 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3983
Eli Bendersky52467b12012-06-01 07:13:08 +03003984 Py_INCREF((PyObject *)&XMLParser_Type);
3985 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003986
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003987 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003988}