blob: 2d623dc262089fb9c50d689d7c309309161ec68e [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +0200153 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
Eli Bendersky48d358b2012-05-30 17:57:50 +0300154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200629 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300630
631 if (self->weakreflist != NULL)
632 PyObject_ClearWeakRefs((PyObject *) self);
633
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 /* element_gc_clear clears all references and deallocates extra
635 */
636 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000637
638 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200639 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200640 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000641}
642
643/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
Serhiy Storchakacb985562015-05-04 15:32:48 +0300645/*[clinic input]
646_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648 subelement: object(subclass_of='&Element_Type')
649 /
650
651[clinic start generated code]*/
652
653static PyObject *
654_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
655/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
656{
657 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658 return NULL;
659
660 Py_RETURN_NONE;
661}
662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666[clinic start generated code]*/
667
668static PyObject *
669_elementtree_Element_clear_impl(ElementObject *self)
670/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
671{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300672 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 Py_INCREF(Py_None);
675 Py_DECREF(JOIN_OBJ(self->text));
676 self->text = Py_None;
677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->tail));
680 self->tail = Py_None;
681
682 Py_RETURN_NONE;
683}
684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.__copy__
687
688[clinic start generated code]*/
689
690static PyObject *
691_elementtree_Element___copy___impl(ElementObject *self)
692/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 ElementObject* element;
696
Eli Bendersky092af1f2012-03-04 07:14:03 +0200697 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800698 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699 if (!element)
700 return NULL;
701
702 Py_DECREF(JOIN_OBJ(element->text));
703 element->text = self->text;
704 Py_INCREF(JOIN_OBJ(element->text));
705
706 Py_DECREF(JOIN_OBJ(element->tail));
707 element->tail = self->tail;
708 Py_INCREF(JOIN_OBJ(element->tail));
709
710 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000711 if (element_resize(element, self->extra->length) < 0) {
712 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 for (i = 0; i < self->extra->length; i++) {
717 Py_INCREF(self->extra->children[i]);
718 element->extra->children[i] = self->extra->children[i];
719 }
720
721 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722 }
723
724 return (PyObject*) element;
725}
726
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200727/* Helper for a deep copy. */
728LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
729
Serhiy Storchakacb985562015-05-04 15:32:48 +0300730/*[clinic input]
731_elementtree.Element.__deepcopy__
732
733 memo: object
734 /
735
736[clinic start generated code]*/
737
738static PyObject *
739_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
740/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 ElementObject* element;
744 PyObject* tag;
745 PyObject* attrib;
746 PyObject* text;
747 PyObject* tail;
748 PyObject* id;
749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700802 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200820LOCAL(PyObject *)
821deepcopy(PyObject *object, PyObject *memo)
822{
823 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200824 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200825 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200826
827 /* Fast paths */
828 if (object == Py_None || PyUnicode_CheckExact(object)) {
829 Py_INCREF(object);
830 return object;
831 }
832
833 if (Py_REFCNT(object) == 1) {
834 if (PyDict_CheckExact(object)) {
835 PyObject *key, *value;
836 Py_ssize_t pos = 0;
837 int simple = 1;
838 while (PyDict_Next(object, &pos, &key, &value)) {
839 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
840 simple = 0;
841 break;
842 }
843 }
844 if (simple)
845 return PyDict_Copy(object);
846 /* Fall through to general case */
847 }
848 else if (Element_CheckExact(object)) {
849 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
850 }
851 }
852
853 /* General case */
854 st = ET_STATE_GLOBAL;
855 if (!st->deepcopy_obj) {
856 PyErr_SetString(PyExc_RuntimeError,
857 "deepcopy helper not found");
858 return NULL;
859 }
860
Victor Stinner7fbac452016-08-20 01:34:44 +0200861 stack[0] = object;
862 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200863 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200864}
865
866
Serhiy Storchakacb985562015-05-04 15:32:48 +0300867/*[clinic input]
868_elementtree.Element.__sizeof__ -> Py_ssize_t
869
870[clinic start generated code]*/
871
872static Py_ssize_t
873_elementtree_Element___sizeof___impl(ElementObject *self)
874/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200876 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877 if (self->extra) {
878 result += sizeof(ElementObjectExtra);
879 if (self->extra->children != self->extra->_children)
880 result += sizeof(PyObject*) * self->extra->allocated;
881 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300882 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200883}
884
Eli Bendersky698bdb22013-01-10 06:01:06 -0800885/* dict keys for getstate/setstate. */
886#define PICKLED_TAG "tag"
887#define PICKLED_CHILDREN "_children"
888#define PICKLED_ATTRIB "attrib"
889#define PICKLED_TAIL "tail"
890#define PICKLED_TEXT "text"
891
892/* __getstate__ returns a fabricated instance dict as in the pure-Python
893 * Element implementation, for interoperability/interchangeability. This
894 * makes the pure-Python implementation details an API, but (a) there aren't
895 * any unnecessary structures there; and (b) it buys compatibility with 3.2
896 * pickles. See issue #16076.
897 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300898/*[clinic input]
899_elementtree.Element.__getstate__
900
901[clinic start generated code]*/
902
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300904_elementtree_Element___getstate___impl(ElementObject *self)
905/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200907 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908 PyObject *instancedict = NULL, *children;
909
910 /* Build a list of children. */
911 children = PyList_New(self->extra ? self->extra->length : 0);
912 if (!children)
913 return NULL;
914 for (i = 0; i < PyList_GET_SIZE(children); i++) {
915 PyObject *child = self->extra->children[i];
916 Py_INCREF(child);
917 PyList_SET_ITEM(children, i, child);
918 }
919
920 /* Construct the state object. */
921 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
922 if (noattrib)
923 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
924 PICKLED_TAG, self->tag,
925 PICKLED_CHILDREN, children,
926 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700927 PICKLED_TEXT, JOIN_OBJ(self->text),
928 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800929 else
930 instancedict = Py_BuildValue("{sOsOsOsOsO}",
931 PICKLED_TAG, self->tag,
932 PICKLED_CHILDREN, children,
933 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700934 PICKLED_TEXT, JOIN_OBJ(self->text),
935 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800936 if (instancedict) {
937 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800939 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 else {
941 for (i = 0; i < PyList_GET_SIZE(children); i++)
942 Py_DECREF(PyList_GET_ITEM(children, i));
943 Py_DECREF(children);
944
945 return NULL;
946 }
947}
948
949static PyObject *
950element_setstate_from_attributes(ElementObject *self,
951 PyObject *tag,
952 PyObject *attrib,
953 PyObject *text,
954 PyObject *tail,
955 PyObject *children)
956{
957 Py_ssize_t i, nchildren;
958
959 if (!tag) {
960 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
961 return NULL;
962 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200964 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300965 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966
Eli Benderskydd3661e2013-09-13 06:24:25 -0700967 _clear_joined_ptr(&self->text);
968 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
969 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Eli Benderskydd3661e2013-09-13 06:24:25 -0700971 _clear_joined_ptr(&self->tail);
972 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
973 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
975 /* Handle ATTRIB and CHILDREN. */
976 if (!children && !attrib)
977 Py_RETURN_NONE;
978
979 /* Compute 'nchildren'. */
980 if (children) {
981 if (!PyList_Check(children)) {
982 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
983 return NULL;
984 }
985 nchildren = PyList_Size(children);
986 }
987 else {
988 nchildren = 0;
989 }
990
991 /* Allocate 'extra'. */
992 if (element_resize(self, nchildren)) {
993 return NULL;
994 }
995 assert(self->extra && self->extra->allocated >= nchildren);
996
997 /* Copy children */
998 for (i = 0; i < nchildren; i++) {
999 self->extra->children[i] = PyList_GET_ITEM(children, i);
1000 Py_INCREF(self->extra->children[i]);
1001 }
1002
1003 self->extra->length = nchildren;
1004 self->extra->allocated = nchildren;
1005
1006 /* Stash attrib. */
1007 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001009 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 }
1011
1012 Py_RETURN_NONE;
1013}
1014
1015/* __setstate__ for Element instance from the Python implementation.
1016 * 'state' should be the instance dict.
1017 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001018
Eli Bendersky698bdb22013-01-10 06:01:06 -08001019static PyObject *
1020element_setstate_from_Python(ElementObject *self, PyObject *state)
1021{
1022 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1023 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1024 PyObject *args;
1025 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001026 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028 tag = attrib = text = tail = children = NULL;
1029 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032
1033 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1034 &attrib, &text, &tail, &children))
1035 retval = element_setstate_from_attributes(self, tag, attrib, text,
1036 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001037 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001038 retval = NULL;
1039
1040 Py_DECREF(args);
1041 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042}
1043
Serhiy Storchakacb985562015-05-04 15:32:48 +03001044/*[clinic input]
1045_elementtree.Element.__setstate__
1046
1047 state: object
1048 /
1049
1050[clinic start generated code]*/
1051
Eli Bendersky698bdb22013-01-10 06:01:06 -08001052static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001053_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1054/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001055{
1056 if (!PyDict_CheckExact(state)) {
1057 PyErr_Format(PyExc_TypeError,
1058 "Don't know how to unpickle \"%.200R\" as an Element",
1059 state);
1060 return NULL;
1061 }
1062 else
1063 return element_setstate_from_Python(self, state);
1064}
1065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066LOCAL(int)
1067checkpath(PyObject* tag)
1068{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001069 Py_ssize_t i;
1070 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071
1072 /* check if a tag contains an xpath character */
1073
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001074#define PATHCHAR(ch) \
1075 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1079 void *data = PyUnicode_DATA(tag);
1080 unsigned int kind = PyUnicode_KIND(tag);
1081 for (i = 0; i < len; i++) {
1082 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1083 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return 1;
1089 }
1090 return 0;
1091 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001092 if (PyBytes_Check(tag)) {
1093 char *p = PyBytes_AS_STRING(tag);
1094 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001095 if (p[i] == '{')
1096 check = 0;
1097 else if (p[i] == '}')
1098 check = 1;
1099 else if (check && PATHCHAR(p[i]))
1100 return 1;
1101 }
1102 return 0;
1103 }
1104
1105 return 1; /* unknown type; might be path expression */
1106}
1107
Serhiy Storchakacb985562015-05-04 15:32:48 +03001108/*[clinic input]
1109_elementtree.Element.extend
1110
1111 elements: object
1112 /
1113
1114[clinic start generated code]*/
1115
1116static PyObject *
1117_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1118/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119{
1120 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001121 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122
Serhiy Storchakacb985562015-05-04 15:32:48 +03001123 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 if (!seq) {
1125 PyErr_Format(
1126 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 );
1129 return NULL;
1130 }
1131
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001133 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 Py_INCREF(element);
1135 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001136 PyErr_Format(
1137 PyExc_TypeError,
1138 "expected an Element, not \"%.200s\"",
1139 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001140 Py_DECREF(seq);
1141 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001142 return NULL;
1143 }
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145 if (element_add_subelement(self, element) < 0) {
1146 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001147 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 return NULL;
1149 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001150 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 }
1152
1153 Py_DECREF(seq);
1154
1155 Py_RETURN_NONE;
1156}
1157
Serhiy Storchakacb985562015-05-04 15:32:48 +03001158/*[clinic input]
1159_elementtree.Element.find
1160
1161 path: object
1162 namespaces: object = None
1163
1164[clinic start generated code]*/
1165
1166static PyObject *
1167_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1168 PyObject *namespaces)
1169/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001171 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001172 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001173
Serhiy Storchakacb985562015-05-04 15:32:48 +03001174 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001176 return _PyObject_CallMethodIdObjArgs(
1177 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001179 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180
1181 if (!self->extra)
1182 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184 for (i = 0; i < self->extra->length; i++) {
1185 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001186 int rc;
1187 if (!Element_CheckExact(item))
1188 continue;
1189 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001190 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 Py_DECREF(item);
1194 if (rc < 0)
1195 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 }
1197
1198 Py_RETURN_NONE;
1199}
1200
Serhiy Storchakacb985562015-05-04 15:32:48 +03001201/*[clinic input]
1202_elementtree.Element.findtext
1203
1204 path: object
1205 default: object = None
1206 namespaces: object = None
1207
1208[clinic start generated code]*/
1209
1210static PyObject *
1211_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1212 PyObject *default_value,
1213 PyObject *namespaces)
1214/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001216 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001217 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001218 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219
Serhiy Storchakacb985562015-05-04 15:32:48 +03001220 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001221 return _PyObject_CallMethodIdObjArgs(
1222 st->elementpath_obj, &PyId_findtext,
1223 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 );
1225
1226 if (!self->extra) {
1227 Py_INCREF(default_value);
1228 return default_value;
1229 }
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001233 int rc;
1234 if (!Element_CheckExact(item))
1235 continue;
1236 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001237 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001238 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001239 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 if (text == Py_None) {
1241 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001242 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001244 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 return text;
1247 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 Py_DECREF(item);
1249 if (rc < 0)
1250 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251 }
1252
1253 Py_INCREF(default_value);
1254 return default_value;
1255}
1256
Serhiy Storchakacb985562015-05-04 15:32:48 +03001257/*[clinic input]
1258_elementtree.Element.findall
1259
1260 path: object
1261 namespaces: object = None
1262
1263[clinic start generated code]*/
1264
1265static PyObject *
1266_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1267 PyObject *namespaces)
1268/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001270 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001271 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001272 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001273 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001274
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001276 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001277 return _PyObject_CallMethodIdObjArgs(
1278 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001281
1282 out = PyList_New(0);
1283 if (!out)
1284 return NULL;
1285
1286 if (!self->extra)
1287 return out;
1288
1289 for (i = 0; i < self->extra->length; i++) {
1290 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001291 int rc;
1292 if (!Element_CheckExact(item))
1293 continue;
1294 Py_INCREF(item);
1295 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1296 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1297 Py_DECREF(item);
1298 Py_DECREF(out);
1299 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001301 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001302 }
1303
1304 return out;
1305}
1306
Serhiy Storchakacb985562015-05-04 15:32:48 +03001307/*[clinic input]
1308_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310 path: object
1311 namespaces: object = None
1312
1313[clinic start generated code]*/
1314
1315static PyObject *
1316_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1317 PyObject *namespaces)
1318/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1319{
1320 PyObject* tag = path;
1321 _Py_IDENTIFIER(iterfind);
1322 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323
Victor Stinnerf5616342016-12-09 15:26:00 +01001324 return _PyObject_CallMethodIdObjArgs(
1325 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326}
1327
Serhiy Storchakacb985562015-05-04 15:32:48 +03001328/*[clinic input]
1329_elementtree.Element.get
1330
1331 key: object
1332 default: object = None
1333
1334[clinic start generated code]*/
1335
1336static PyObject *
1337_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1338 PyObject *default_value)
1339/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340{
1341 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342
1343 if (!self->extra || self->extra->attrib == Py_None)
1344 value = default_value;
1345 else {
1346 value = PyDict_GetItem(self->extra->attrib, key);
1347 if (!value)
1348 value = default_value;
1349 }
1350
1351 Py_INCREF(value);
1352 return value;
1353}
1354
Serhiy Storchakacb985562015-05-04 15:32:48 +03001355/*[clinic input]
1356_elementtree.Element.getchildren
1357
1358[clinic start generated code]*/
1359
1360static PyObject *
1361_elementtree_Element_getchildren_impl(ElementObject *self)
1362/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001364 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001365 PyObject* list;
1366
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001367 /* FIXME: report as deprecated? */
1368
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001369 if (!self->extra)
1370 return PyList_New(0);
1371
1372 list = PyList_New(self->extra->length);
1373 if (!list)
1374 return NULL;
1375
1376 for (i = 0; i < self->extra->length; i++) {
1377 PyObject* item = self->extra->children[i];
1378 Py_INCREF(item);
1379 PyList_SET_ITEM(list, i, item);
1380 }
1381
1382 return list;
1383}
1384
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001385
Eli Bendersky64d11e62012-06-15 07:42:50 +03001386static PyObject *
1387create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1388
1389
Serhiy Storchakacb985562015-05-04 15:32:48 +03001390/*[clinic input]
1391_elementtree.Element.iter
1392
1393 tag: object = None
1394
1395[clinic start generated code]*/
1396
Eli Bendersky64d11e62012-06-15 07:42:50 +03001397static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001398_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1399/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001401 if (PyUnicode_Check(tag)) {
1402 if (PyUnicode_READY(tag) < 0)
1403 return NULL;
1404 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1405 tag = Py_None;
1406 }
1407 else if (PyBytes_Check(tag)) {
1408 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1409 tag = Py_None;
1410 }
1411
Eli Bendersky64d11e62012-06-15 07:42:50 +03001412 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001413}
1414
1415
Serhiy Storchakacb985562015-05-04 15:32:48 +03001416/*[clinic input]
1417_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001418
Serhiy Storchakacb985562015-05-04 15:32:48 +03001419[clinic start generated code]*/
1420
1421static PyObject *
1422_elementtree_Element_itertext_impl(ElementObject *self)
1423/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1424{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001425 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001426}
1427
Eli Bendersky64d11e62012-06-15 07:42:50 +03001428
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001430element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001432 ElementObject* self = (ElementObject*) self_;
1433
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434 if (!self->extra || index < 0 || index >= self->extra->length) {
1435 PyErr_SetString(
1436 PyExc_IndexError,
1437 "child index out of range"
1438 );
1439 return NULL;
1440 }
1441
1442 Py_INCREF(self->extra->children[index]);
1443 return self->extra->children[index];
1444}
1445
Serhiy Storchakacb985562015-05-04 15:32:48 +03001446/*[clinic input]
1447_elementtree.Element.insert
1448
1449 index: Py_ssize_t
1450 subelement: object(subclass_of='&Element_Type')
1451 /
1452
1453[clinic start generated code]*/
1454
1455static PyObject *
1456_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1457 PyObject *subelement)
1458/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001460 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001461
Victor Stinner5f0af232013-07-11 23:01:36 +02001462 if (!self->extra) {
1463 if (create_extra(self, NULL) < 0)
1464 return NULL;
1465 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001467 if (index < 0) {
1468 index += self->extra->length;
1469 if (index < 0)
1470 index = 0;
1471 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001472 if (index > self->extra->length)
1473 index = self->extra->length;
1474
1475 if (element_resize(self, 1) < 0)
1476 return NULL;
1477
1478 for (i = self->extra->length; i > index; i--)
1479 self->extra->children[i] = self->extra->children[i-1];
1480
Serhiy Storchakacb985562015-05-04 15:32:48 +03001481 Py_INCREF(subelement);
1482 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483
1484 self->extra->length++;
1485
1486 Py_RETURN_NONE;
1487}
1488
Serhiy Storchakacb985562015-05-04 15:32:48 +03001489/*[clinic input]
1490_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491
Serhiy Storchakacb985562015-05-04 15:32:48 +03001492[clinic start generated code]*/
1493
1494static PyObject *
1495_elementtree_Element_items_impl(ElementObject *self)
1496/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1497{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001498 if (!self->extra || self->extra->attrib == Py_None)
1499 return PyList_New(0);
1500
1501 return PyDict_Items(self->extra->attrib);
1502}
1503
Serhiy Storchakacb985562015-05-04 15:32:48 +03001504/*[clinic input]
1505_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001506
Serhiy Storchakacb985562015-05-04 15:32:48 +03001507[clinic start generated code]*/
1508
1509static PyObject *
1510_elementtree_Element_keys_impl(ElementObject *self)
1511/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1512{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513 if (!self->extra || self->extra->attrib == Py_None)
1514 return PyList_New(0);
1515
1516 return PyDict_Keys(self->extra->attrib);
1517}
1518
Martin v. Löwis18e16552006-02-15 17:27:45 +00001519static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001520element_length(ElementObject* self)
1521{
1522 if (!self->extra)
1523 return 0;
1524
1525 return self->extra->length;
1526}
1527
Serhiy Storchakacb985562015-05-04 15:32:48 +03001528/*[clinic input]
1529_elementtree.Element.makeelement
1530
1531 tag: object
1532 attrib: object
1533 /
1534
1535[clinic start generated code]*/
1536
1537static PyObject *
1538_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1539 PyObject *attrib)
1540/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541{
1542 PyObject* elem;
1543
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 attrib = PyDict_Copy(attrib);
1545 if (!attrib)
1546 return NULL;
1547
Eli Bendersky092af1f2012-03-04 07:14:03 +02001548 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549
1550 Py_DECREF(attrib);
1551
1552 return elem;
1553}
1554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555/*[clinic input]
1556_elementtree.Element.remove
1557
1558 subelement: object(subclass_of='&Element_Type')
1559 /
1560
1561[clinic start generated code]*/
1562
1563static PyObject *
1564_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1565/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001567 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001568 int rc;
1569 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571 if (!self->extra) {
1572 /* element has no children, so raise exception */
1573 PyErr_SetString(
1574 PyExc_ValueError,
1575 "list.remove(x): x not in list"
1576 );
1577 return NULL;
1578 }
1579
1580 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001581 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001583 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001584 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001586 if (rc < 0)
1587 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588 }
1589
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001590 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001591 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001592 PyErr_SetString(
1593 PyExc_ValueError,
1594 "list.remove(x): x not in list"
1595 );
1596 return NULL;
1597 }
1598
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001599 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600
1601 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602 for (; i < self->extra->length; i++)
1603 self->extra->children[i] = self->extra->children[i+1];
1604
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001605 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606 Py_RETURN_NONE;
1607}
1608
1609static PyObject*
1610element_repr(ElementObject* self)
1611{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001612 int status;
1613
1614 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001615 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001616
1617 status = Py_ReprEnter((PyObject *)self);
1618 if (status == 0) {
1619 PyObject *res;
1620 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1621 Py_ReprLeave((PyObject *)self);
1622 return res;
1623 }
1624 if (status > 0)
1625 PyErr_Format(PyExc_RuntimeError,
1626 "reentrant call inside %s.__repr__",
1627 Py_TYPE(self)->tp_name);
1628 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629}
1630
Serhiy Storchakacb985562015-05-04 15:32:48 +03001631/*[clinic input]
1632_elementtree.Element.set
1633
1634 key: object
1635 value: object
1636 /
1637
1638[clinic start generated code]*/
1639
1640static PyObject *
1641_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1642 PyObject *value)
1643/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001644{
1645 PyObject* attrib;
1646
Victor Stinner5f0af232013-07-11 23:01:36 +02001647 if (!self->extra) {
1648 if (create_extra(self, NULL) < 0)
1649 return NULL;
1650 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001651
1652 attrib = element_get_attrib(self);
1653 if (!attrib)
1654 return NULL;
1655
1656 if (PyDict_SetItem(attrib, key, value) < 0)
1657 return NULL;
1658
1659 Py_RETURN_NONE;
1660}
1661
1662static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001663element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001665 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001666 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001667 PyObject* old;
1668
1669 if (!self->extra || index < 0 || index >= self->extra->length) {
1670 PyErr_SetString(
1671 PyExc_IndexError,
1672 "child assignment index out of range");
1673 return -1;
1674 }
1675
1676 old = self->extra->children[index];
1677
1678 if (item) {
1679 Py_INCREF(item);
1680 self->extra->children[index] = item;
1681 } else {
1682 self->extra->length--;
1683 for (i = index; i < self->extra->length; i++)
1684 self->extra->children[i] = self->extra->children[i+1];
1685 }
1686
1687 Py_DECREF(old);
1688
1689 return 0;
1690}
1691
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001692static PyObject*
1693element_subscr(PyObject* self_, PyObject* item)
1694{
1695 ElementObject* self = (ElementObject*) self_;
1696
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001697 if (PyIndex_Check(item)) {
1698 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001699
1700 if (i == -1 && PyErr_Occurred()) {
1701 return NULL;
1702 }
1703 if (i < 0 && self->extra)
1704 i += self->extra->length;
1705 return element_getitem(self_, i);
1706 }
1707 else if (PySlice_Check(item)) {
1708 Py_ssize_t start, stop, step, slicelen, cur, i;
1709 PyObject* list;
1710
1711 if (!self->extra)
1712 return PyList_New(0);
1713
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001714 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001715 self->extra->length,
1716 &start, &stop, &step, &slicelen) < 0) {
1717 return NULL;
1718 }
1719
1720 if (slicelen <= 0)
1721 return PyList_New(0);
1722 else {
1723 list = PyList_New(slicelen);
1724 if (!list)
1725 return NULL;
1726
1727 for (cur = start, i = 0; i < slicelen;
1728 cur += step, i++) {
1729 PyObject* item = self->extra->children[cur];
1730 Py_INCREF(item);
1731 PyList_SET_ITEM(list, i, item);
1732 }
1733
1734 return list;
1735 }
1736 }
1737 else {
1738 PyErr_SetString(PyExc_TypeError,
1739 "element indices must be integers");
1740 return NULL;
1741 }
1742}
1743
1744static int
1745element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1746{
1747 ElementObject* self = (ElementObject*) self_;
1748
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749 if (PyIndex_Check(item)) {
1750 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001751
1752 if (i == -1 && PyErr_Occurred()) {
1753 return -1;
1754 }
1755 if (i < 0 && self->extra)
1756 i += self->extra->length;
1757 return element_setitem(self_, i, value);
1758 }
1759 else if (PySlice_Check(item)) {
1760 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1761
1762 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001763 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001764
Victor Stinner5f0af232013-07-11 23:01:36 +02001765 if (!self->extra) {
1766 if (create_extra(self, NULL) < 0)
1767 return -1;
1768 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001770 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001771 self->extra->length,
1772 &start, &stop, &step, &slicelen) < 0) {
1773 return -1;
1774 }
1775
Eli Bendersky865756a2012-03-09 13:38:15 +02001776 if (value == NULL) {
1777 /* Delete slice */
1778 size_t cur;
1779 Py_ssize_t i;
1780
1781 if (slicelen <= 0)
1782 return 0;
1783
1784 /* Since we're deleting, the direction of the range doesn't matter,
1785 * so for simplicity make it always ascending.
1786 */
1787 if (step < 0) {
1788 stop = start + 1;
1789 start = stop + step * (slicelen - 1) - 1;
1790 step = -step;
1791 }
1792
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001793 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001794
1795 /* recycle is a list that will contain all the children
1796 * scheduled for removal.
1797 */
1798 if (!(recycle = PyList_New(slicelen))) {
1799 PyErr_NoMemory();
1800 return -1;
1801 }
1802
1803 /* This loop walks over all the children that have to be deleted,
1804 * with cur pointing at them. num_moved is the amount of children
1805 * until the next deleted child that have to be "shifted down" to
1806 * occupy the deleted's places.
1807 * Note that in the ith iteration, shifting is done i+i places down
1808 * because i children were already removed.
1809 */
1810 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1811 /* Compute how many children have to be moved, clipping at the
1812 * list end.
1813 */
1814 Py_ssize_t num_moved = step - 1;
1815 if (cur + step >= (size_t)self->extra->length) {
1816 num_moved = self->extra->length - cur - 1;
1817 }
1818
1819 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1820
1821 memmove(
1822 self->extra->children + cur - i,
1823 self->extra->children + cur + 1,
1824 num_moved * sizeof(PyObject *));
1825 }
1826
1827 /* Leftover "tail" after the last removed child */
1828 cur = start + (size_t)slicelen * step;
1829 if (cur < (size_t)self->extra->length) {
1830 memmove(
1831 self->extra->children + cur - slicelen,
1832 self->extra->children + cur,
1833 (self->extra->length - cur) * sizeof(PyObject *));
1834 }
1835
1836 self->extra->length -= slicelen;
1837
1838 /* Discard the recycle list with all the deleted sub-elements */
1839 Py_XDECREF(recycle);
1840 return 0;
1841 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001842
1843 /* A new slice is actually being assigned */
1844 seq = PySequence_Fast(value, "");
1845 if (!seq) {
1846 PyErr_Format(
1847 PyExc_TypeError,
1848 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1849 );
1850 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001851 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001852 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853
1854 if (step != 1 && newlen != slicelen)
1855 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001856 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001857 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858 "attempt to assign sequence of size %zd "
1859 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001860 newlen, slicelen
1861 );
1862 return -1;
1863 }
1864
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001865 /* Resize before creating the recycle bin, to prevent refleaks. */
1866 if (newlen > slicelen) {
1867 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001868 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001869 return -1;
1870 }
1871 }
1872
1873 if (slicelen > 0) {
1874 /* to avoid recursive calls to this method (via decref), move
1875 old items to the recycle bin here, and get rid of them when
1876 we're done modifying the element */
1877 recycle = PyList_New(slicelen);
1878 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001879 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001880 return -1;
1881 }
1882 for (cur = start, i = 0; i < slicelen;
1883 cur += step, i++)
1884 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1885 }
1886
1887 if (newlen < slicelen) {
1888 /* delete slice */
1889 for (i = stop; i < self->extra->length; i++)
1890 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1891 } else if (newlen > slicelen) {
1892 /* insert slice */
1893 for (i = self->extra->length-1; i >= stop; i--)
1894 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1895 }
1896
1897 /* replace the slice */
1898 for (cur = start, i = 0; i < newlen;
1899 cur += step, i++) {
1900 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1901 Py_INCREF(element);
1902 self->extra->children[cur] = element;
1903 }
1904
1905 self->extra->length += newlen - slicelen;
1906
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001907 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001908
1909 /* discard the recycle bin, and everything in it */
1910 Py_XDECREF(recycle);
1911
1912 return 0;
1913 }
1914 else {
1915 PyErr_SetString(PyExc_TypeError,
1916 "element indices must be integers");
1917 return -1;
1918 }
1919}
1920
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001921static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001922element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001923{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001924 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001925 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926 return res;
1927}
1928
Serhiy Storchakadde08152015-11-25 15:28:13 +02001929static PyObject*
1930element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001932 PyObject *res = element_get_text(self);
1933 Py_XINCREF(res);
1934 return res;
1935}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001936
Serhiy Storchakadde08152015-11-25 15:28:13 +02001937static PyObject*
1938element_tail_getter(ElementObject *self, void *closure)
1939{
1940 PyObject *res = element_get_tail(self);
1941 Py_XINCREF(res);
1942 return res;
1943}
1944
1945static PyObject*
1946element_attrib_getter(ElementObject *self, void *closure)
1947{
1948 PyObject *res;
1949 if (!self->extra) {
1950 if (create_extra(self, NULL) < 0)
1951 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001952 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001953 res = element_get_attrib(self);
1954 Py_XINCREF(res);
1955 return res;
1956}
Victor Stinner4d463432013-07-11 23:05:03 +02001957
Serhiy Storchakadde08152015-11-25 15:28:13 +02001958/* macro for setter validation */
1959#define _VALIDATE_ATTR_VALUE(V) \
1960 if ((V) == NULL) { \
1961 PyErr_SetString( \
1962 PyExc_AttributeError, \
1963 "can't delete element attribute"); \
1964 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001965 }
1966
Serhiy Storchakadde08152015-11-25 15:28:13 +02001967static int
1968element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1969{
1970 _VALIDATE_ATTR_VALUE(value);
1971 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001972 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001973 return 0;
1974}
1975
1976static int
1977element_text_setter(ElementObject *self, PyObject *value, void *closure)
1978{
1979 _VALIDATE_ATTR_VALUE(value);
1980 Py_INCREF(value);
1981 Py_DECREF(JOIN_OBJ(self->text));
1982 self->text = value;
1983 return 0;
1984}
1985
1986static int
1987element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1988{
1989 _VALIDATE_ATTR_VALUE(value);
1990 Py_INCREF(value);
1991 Py_DECREF(JOIN_OBJ(self->tail));
1992 self->tail = value;
1993 return 0;
1994}
1995
1996static int
1997element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1998{
1999 _VALIDATE_ATTR_VALUE(value);
2000 if (!self->extra) {
2001 if (create_extra(self, NULL) < 0)
2002 return -1;
2003 }
2004 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002005 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002006 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002007}
2008
2009static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002010 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002011 0, /* sq_concat */
2012 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002013 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002014 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002015 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002016 0,
2017};
2018
Eli Bendersky64d11e62012-06-15 07:42:50 +03002019/******************************* Element iterator ****************************/
2020
2021/* ElementIterObject represents the iteration state over an XML element in
2022 * pre-order traversal. To keep track of which sub-element should be returned
2023 * next, a stack of parents is maintained. This is a standard stack-based
2024 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002025 * The stack is managed using a continuous array.
2026 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002027 * the current one is exhausted, and the next child to examine in that parent.
2028 */
2029typedef struct ParentLocator_t {
2030 ElementObject *parent;
2031 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002032} ParentLocator;
2033
2034typedef struct {
2035 PyObject_HEAD
2036 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002037 Py_ssize_t parent_stack_used;
2038 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002039 ElementObject *root_element;
2040 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002041 int gettext;
2042} ElementIterObject;
2043
2044
2045static void
2046elementiter_dealloc(ElementIterObject *it)
2047{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002048 Py_ssize_t i = it->parent_stack_used;
2049 it->parent_stack_used = 0;
2050 while (i--)
2051 Py_XDECREF(it->parent_stack[i].parent);
2052 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002053
2054 Py_XDECREF(it->sought_tag);
2055 Py_XDECREF(it->root_element);
2056
2057 PyObject_GC_UnTrack(it);
2058 PyObject_GC_Del(it);
2059}
2060
2061static int
2062elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2063{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002064 Py_ssize_t i = it->parent_stack_used;
2065 while (i--)
2066 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067
2068 Py_VISIT(it->root_element);
2069 Py_VISIT(it->sought_tag);
2070 return 0;
2071}
2072
2073/* Helper function for elementiter_next. Add a new parent to the parent stack.
2074 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002075static int
2076parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002077{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002078 ParentLocator *item;
2079
2080 if (it->parent_stack_used >= it->parent_stack_size) {
2081 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2082 ParentLocator *parent_stack = it->parent_stack;
2083 PyMem_Resize(parent_stack, ParentLocator, new_size);
2084 if (parent_stack == NULL)
2085 return -1;
2086 it->parent_stack = parent_stack;
2087 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002088 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002089 item = it->parent_stack + it->parent_stack_used++;
2090 Py_INCREF(parent);
2091 item->parent = parent;
2092 item->child_index = 0;
2093 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002094}
2095
2096static PyObject *
2097elementiter_next(ElementIterObject *it)
2098{
2099 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002100 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101 * A short note on gettext: this function serves both the iter() and
2102 * itertext() methods to avoid code duplication. However, there are a few
2103 * small differences in the way these iterations work. Namely:
2104 * - itertext() only yields text from nodes that have it, and continues
2105 * iterating when a node doesn't have text (so it doesn't return any
2106 * node like iter())
2107 * - itertext() also has to handle tail, after finishing with all the
2108 * children of a node.
2109 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002110 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002111 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002112 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113
2114 while (1) {
2115 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002116 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 * iterator is exhausted.
2118 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119 if (!it->parent_stack_used) {
2120 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 PyErr_SetNone(PyExc_StopIteration);
2122 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123 }
2124
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002125 elem = it->root_element; /* steals a reference */
2126 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002127 }
2128 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002129 /* See if there are children left to traverse in the current parent. If
2130 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002131 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002132 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2133 Py_ssize_t child_index = item->child_index;
2134 ElementObjectExtra *extra;
2135 elem = item->parent;
2136 extra = elem->extra;
2137 if (!extra || child_index >= extra->length) {
2138 it->parent_stack_used--;
2139 /* Note that extra condition on it->parent_stack_used here;
2140 * this is because itertext() is supposed to only return *inner*
2141 * text, not text following the element it began iteration with.
2142 */
2143 if (it->gettext && it->parent_stack_used) {
2144 text = element_get_tail(elem);
2145 goto gettext;
2146 }
2147 Py_DECREF(elem);
2148 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002149 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002150
2151 elem = (ElementObject *)extra->children[child_index];
2152 item->child_index++;
2153 Py_INCREF(elem);
2154 }
2155
2156 if (parent_stack_push_new(it, elem) < 0) {
2157 Py_DECREF(elem);
2158 PyErr_NoMemory();
2159 return NULL;
2160 }
2161 if (it->gettext) {
2162 text = element_get_text(elem);
2163 goto gettext;
2164 }
2165
2166 if (it->sought_tag == Py_None)
2167 return (PyObject *)elem;
2168
2169 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2170 if (rc > 0)
2171 return (PyObject *)elem;
2172
2173 Py_DECREF(elem);
2174 if (rc < 0)
2175 return NULL;
2176 continue;
2177
2178gettext:
2179 if (!text) {
2180 Py_DECREF(elem);
2181 return NULL;
2182 }
2183 if (text == Py_None) {
2184 Py_DECREF(elem);
2185 }
2186 else {
2187 Py_INCREF(text);
2188 Py_DECREF(elem);
2189 rc = PyObject_IsTrue(text);
2190 if (rc > 0)
2191 return text;
2192 Py_DECREF(text);
2193 if (rc < 0)
2194 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002195 }
2196 }
2197
2198 return NULL;
2199}
2200
2201
2202static PyTypeObject ElementIter_Type = {
2203 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002204 /* Using the module's name since the pure-Python implementation does not
2205 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002206 "_elementtree._element_iterator", /* tp_name */
2207 sizeof(ElementIterObject), /* tp_basicsize */
2208 0, /* tp_itemsize */
2209 /* methods */
2210 (destructor)elementiter_dealloc, /* tp_dealloc */
2211 0, /* tp_print */
2212 0, /* tp_getattr */
2213 0, /* tp_setattr */
2214 0, /* tp_reserved */
2215 0, /* tp_repr */
2216 0, /* tp_as_number */
2217 0, /* tp_as_sequence */
2218 0, /* tp_as_mapping */
2219 0, /* tp_hash */
2220 0, /* tp_call */
2221 0, /* tp_str */
2222 0, /* tp_getattro */
2223 0, /* tp_setattro */
2224 0, /* tp_as_buffer */
2225 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2226 0, /* tp_doc */
2227 (traverseproc)elementiter_traverse, /* tp_traverse */
2228 0, /* tp_clear */
2229 0, /* tp_richcompare */
2230 0, /* tp_weaklistoffset */
2231 PyObject_SelfIter, /* tp_iter */
2232 (iternextfunc)elementiter_next, /* tp_iternext */
2233 0, /* tp_methods */
2234 0, /* tp_members */
2235 0, /* tp_getset */
2236 0, /* tp_base */
2237 0, /* tp_dict */
2238 0, /* tp_descr_get */
2239 0, /* tp_descr_set */
2240 0, /* tp_dictoffset */
2241 0, /* tp_init */
2242 0, /* tp_alloc */
2243 0, /* tp_new */
2244};
2245
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002246#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002247
2248static PyObject *
2249create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2250{
2251 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002252
2253 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2254 if (!it)
2255 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002256
Victor Stinner4d463432013-07-11 23:05:03 +02002257 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002258 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002259 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002260 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002261 it->root_element = self;
2262
Eli Bendersky64d11e62012-06-15 07:42:50 +03002263 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002264
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002265 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002266 if (it->parent_stack == NULL) {
2267 Py_DECREF(it);
2268 PyErr_NoMemory();
2269 return NULL;
2270 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002271 it->parent_stack_used = 0;
2272 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002273
Eli Bendersky64d11e62012-06-15 07:42:50 +03002274 return (PyObject *)it;
2275}
2276
2277
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278/* ==================================================================== */
2279/* the tree builder type */
2280
2281typedef struct {
2282 PyObject_HEAD
2283
Eli Bendersky58d548d2012-05-29 15:45:16 +03002284 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
Antoine Pitrouee329312012-10-04 19:53:29 +02002286 PyObject *this; /* current node */
2287 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288
Eli Bendersky58d548d2012-05-29 15:45:16 +03002289 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290
Eli Bendersky58d548d2012-05-29 15:45:16 +03002291 PyObject *stack; /* element stack */
2292 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293
Eli Bendersky48d358b2012-05-30 17:57:50 +03002294 PyObject *element_factory;
2295
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002297 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002298 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2299 PyObject *end_event_obj;
2300 PyObject *start_ns_event_obj;
2301 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302} TreeBuilderObject;
2303
Christian Heimes90aa7642007-12-19 02:45:37 +00002304#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305
2306/* -------------------------------------------------------------------- */
2307/* constructor and destructor */
2308
Eli Bendersky58d548d2012-05-29 15:45:16 +03002309static PyObject *
2310treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002312 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2313 if (t != NULL) {
2314 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002317 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002319 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002322 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 t->stack = PyList_New(20);
2324 if (!t->stack) {
2325 Py_DECREF(t->this);
2326 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002327 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002328 return NULL;
2329 }
2330 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002332 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002333 t->start_event_obj = t->end_event_obj = NULL;
2334 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2335 }
2336 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337}
2338
Serhiy Storchakacb985562015-05-04 15:32:48 +03002339/*[clinic input]
2340_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002341
Serhiy Storchakacb985562015-05-04 15:32:48 +03002342 element_factory: object = NULL
2343
2344[clinic start generated code]*/
2345
2346static int
2347_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2348 PyObject *element_factory)
2349/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2350{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002351 if (element_factory) {
2352 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002353 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002354 }
2355
Eli Bendersky58d548d2012-05-29 15:45:16 +03002356 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002357}
2358
Eli Bendersky48d358b2012-05-30 17:57:50 +03002359static int
2360treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2361{
2362 Py_VISIT(self->root);
2363 Py_VISIT(self->this);
2364 Py_VISIT(self->last);
2365 Py_VISIT(self->data);
2366 Py_VISIT(self->stack);
2367 Py_VISIT(self->element_factory);
2368 return 0;
2369}
2370
2371static int
2372treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002373{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002374 Py_CLEAR(self->end_ns_event_obj);
2375 Py_CLEAR(self->start_ns_event_obj);
2376 Py_CLEAR(self->end_event_obj);
2377 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002378 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002379 Py_CLEAR(self->stack);
2380 Py_CLEAR(self->data);
2381 Py_CLEAR(self->last);
2382 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002383 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002384 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385 return 0;
2386}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002387
Eli Bendersky48d358b2012-05-30 17:57:50 +03002388static void
2389treebuilder_dealloc(TreeBuilderObject *self)
2390{
2391 PyObject_GC_UnTrack(self);
2392 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002393 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394}
2395
2396/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002397/* helpers for handling of arbitrary element-like objects */
2398
2399static int
2400treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2401 PyObject **dest, _Py_Identifier *name)
2402{
2403 if (Element_CheckExact(element)) {
2404 Py_DECREF(JOIN_OBJ(*dest));
2405 *dest = JOIN_SET(data, PyList_CheckExact(data));
2406 return 0;
2407 }
2408 else {
2409 PyObject *joined = list_join(data);
2410 int r;
2411 if (joined == NULL)
2412 return -1;
2413 r = _PyObject_SetAttrId(element, name, joined);
2414 Py_DECREF(joined);
2415 return r;
2416 }
2417}
2418
2419/* These two functions steal a reference to data */
2420static int
2421treebuilder_set_element_text(PyObject *element, PyObject *data)
2422{
2423 _Py_IDENTIFIER(text);
2424 return treebuilder_set_element_text_or_tail(
2425 element, data, &((ElementObject *) element)->text, &PyId_text);
2426}
2427
2428static int
2429treebuilder_set_element_tail(PyObject *element, PyObject *data)
2430{
2431 _Py_IDENTIFIER(tail);
2432 return treebuilder_set_element_text_or_tail(
2433 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2434}
2435
2436static int
2437treebuilder_add_subelement(PyObject *element, PyObject *child)
2438{
2439 _Py_IDENTIFIER(append);
2440 if (Element_CheckExact(element)) {
2441 ElementObject *elem = (ElementObject *) element;
2442 return element_add_subelement(elem, child);
2443 }
2444 else {
2445 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002446 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002447 if (res == NULL)
2448 return -1;
2449 Py_DECREF(res);
2450 return 0;
2451 }
2452}
2453
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002454LOCAL(int)
2455treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2456 PyObject *node)
2457{
2458 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002459 PyObject *res;
2460 PyObject *event = PyTuple_Pack(2, action, node);
2461 if (event == NULL)
2462 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002463 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002464 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002465 if (res == NULL)
2466 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002467 Py_DECREF(res);
2468 }
2469 return 0;
2470}
2471
Antoine Pitrouee329312012-10-04 19:53:29 +02002472/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473/* handlers */
2474
2475LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2477 PyObject* attrib)
2478{
2479 PyObject* node;
2480 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002481 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002482
2483 if (self->data) {
2484 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002485 if (treebuilder_set_element_text(self->last, self->data))
2486 return NULL;
2487 }
2488 else {
2489 if (treebuilder_set_element_tail(self->last, self->data))
2490 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491 }
2492 self->data = NULL;
2493 }
2494
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002495 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002496 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002497 } else if (attrib == Py_None) {
2498 attrib = PyDict_New();
2499 if (!attrib)
2500 return NULL;
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002501 node = PyObject_CallFunctionObjArgs(self->element_factory,
2502 tag, attrib, NULL);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002503 Py_DECREF(attrib);
2504 }
2505 else {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002506 node = PyObject_CallFunctionObjArgs(self->element_factory,
2507 tag, attrib, NULL);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002508 }
2509 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002511 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512
Antoine Pitrouee329312012-10-04 19:53:29 +02002513 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514
2515 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002516 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002517 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 } else {
2519 if (self->root) {
2520 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002521 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 "multiple elements on top level"
2523 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002524 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525 }
2526 Py_INCREF(node);
2527 self->root = node;
2528 }
2529
2530 if (self->index < PyList_GET_SIZE(self->stack)) {
2531 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002532 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 Py_INCREF(this);
2534 } else {
2535 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002536 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537 }
2538 self->index++;
2539
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002541 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002543 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002545 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2546 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547
2548 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549
2550 error:
2551 Py_DECREF(node);
2552 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553}
2554
2555LOCAL(PyObject*)
2556treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2557{
2558 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002559 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002560 /* ignore calls to data before the first call to start */
2561 Py_RETURN_NONE;
2562 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 /* store the first item as is */
2564 Py_INCREF(data); self->data = data;
2565 } else {
2566 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002567 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2568 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002569 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 /* expat often generates single character data sections; handle
2571 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002572 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2573 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002575 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 } else if (PyList_CheckExact(self->data)) {
2577 if (PyList_Append(self->data, data) < 0)
2578 return NULL;
2579 } else {
2580 PyObject* list = PyList_New(2);
2581 if (!list)
2582 return NULL;
2583 PyList_SET_ITEM(list, 0, self->data);
2584 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2585 self->data = list;
2586 }
2587 }
2588
2589 Py_RETURN_NONE;
2590}
2591
2592LOCAL(PyObject*)
2593treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2594{
2595 PyObject* item;
2596
2597 if (self->data) {
2598 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002599 if (treebuilder_set_element_text(self->last, self->data))
2600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002602 if (treebuilder_set_element_tail(self->last, self->data))
2603 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 }
2605 self->data = NULL;
2606 }
2607
2608 if (self->index == 0) {
2609 PyErr_SetString(
2610 PyExc_IndexError,
2611 "pop from empty stack"
2612 );
2613 return NULL;
2614 }
2615
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002616 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002617 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002618 self->index--;
2619 self->this = PyList_GET_ITEM(self->stack, self->index);
2620 Py_INCREF(self->this);
2621 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002623 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2624 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625
2626 Py_INCREF(self->last);
2627 return (PyObject*) self->last;
2628}
2629
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630/* -------------------------------------------------------------------- */
2631/* methods (in alphabetical order) */
2632
Serhiy Storchakacb985562015-05-04 15:32:48 +03002633/*[clinic input]
2634_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635
Serhiy Storchakacb985562015-05-04 15:32:48 +03002636 data: object
2637 /
2638
2639[clinic start generated code]*/
2640
2641static PyObject *
2642_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2643/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2644{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645 return treebuilder_handle_data(self, data);
2646}
2647
Serhiy Storchakacb985562015-05-04 15:32:48 +03002648/*[clinic input]
2649_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650
Serhiy Storchakacb985562015-05-04 15:32:48 +03002651 tag: object
2652 /
2653
2654[clinic start generated code]*/
2655
2656static PyObject *
2657_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2658/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2659{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002660 return treebuilder_handle_end(self, tag);
2661}
2662
2663LOCAL(PyObject*)
2664treebuilder_done(TreeBuilderObject* self)
2665{
2666 PyObject* res;
2667
2668 /* FIXME: check stack size? */
2669
2670 if (self->root)
2671 res = self->root;
2672 else
2673 res = Py_None;
2674
2675 Py_INCREF(res);
2676 return res;
2677}
2678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679/*[clinic input]
2680_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002681
Serhiy Storchakacb985562015-05-04 15:32:48 +03002682[clinic start generated code]*/
2683
2684static PyObject *
2685_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2686/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2687{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 return treebuilder_done(self);
2689}
2690
Serhiy Storchakacb985562015-05-04 15:32:48 +03002691/*[clinic input]
2692_elementtree.TreeBuilder.start
2693
2694 tag: object
2695 attrs: object = None
2696 /
2697
2698[clinic start generated code]*/
2699
2700static PyObject *
2701_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2702 PyObject *attrs)
2703/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002705 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706}
2707
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708/* ==================================================================== */
2709/* the expat interface */
2710
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002713
2714/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2715 * cached globally without being in per-module state.
2716 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002717static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719
Eli Bendersky52467b12012-06-01 07:13:08 +03002720static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2721 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2722
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002723typedef struct {
2724 PyObject_HEAD
2725
2726 XML_Parser parser;
2727
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002728 PyObject *target;
2729 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002731 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002733 PyObject *handle_start;
2734 PyObject *handle_data;
2735 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002737 PyObject *handle_comment;
2738 PyObject *handle_pi;
2739 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002741 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002742
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743} XMLParserObject;
2744
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002745static PyObject*
Victor Stinner0c4a8282017-01-17 02:21:47 +01002746_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject **args, Py_ssize_t nargs, PyObject *kwnames);
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002747static PyObject *
2748_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2749 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751/* helpers */
2752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753LOCAL(PyObject*)
2754makeuniversal(XMLParserObject* self, const char* string)
2755{
2756 /* convert a UTF-8 tag/attribute name from the expat parser
2757 to a universal name string */
2758
Antoine Pitrouc1948842012-10-01 23:40:37 +02002759 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 PyObject* key;
2761 PyObject* value;
2762
2763 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002764 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765 if (!key)
2766 return NULL;
2767
2768 value = PyDict_GetItem(self->names, key);
2769
2770 if (value) {
2771 Py_INCREF(value);
2772 } else {
2773 /* new name. convert to universal name, and decode as
2774 necessary */
2775
2776 PyObject* tag;
2777 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002778 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779
2780 /* look for namespace separator */
2781 for (i = 0; i < size; i++)
2782 if (string[i] == '}')
2783 break;
2784 if (i != size) {
2785 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002786 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002787 if (tag == NULL) {
2788 Py_DECREF(key);
2789 return NULL;
2790 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002791 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792 p[0] = '{';
2793 memcpy(p+1, string, size);
2794 size++;
2795 } else {
2796 /* plain name; use key as tag */
2797 Py_INCREF(key);
2798 tag = key;
2799 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002800
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002802 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002803 value = PyUnicode_DecodeUTF8(p, size, "strict");
2804 Py_DECREF(tag);
2805 if (!value) {
2806 Py_DECREF(key);
2807 return NULL;
2808 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002809
2810 /* add to names dictionary */
2811 if (PyDict_SetItem(self->names, key, value) < 0) {
2812 Py_DECREF(key);
2813 Py_DECREF(value);
2814 return NULL;
2815 }
2816 }
2817
2818 Py_DECREF(key);
2819 return value;
2820}
2821
Eli Bendersky5b77d812012-03-16 08:20:05 +02002822/* Set the ParseError exception with the given parameters.
2823 * If message is not NULL, it's used as the error string. Otherwise, the
2824 * message string is the default for the given error_code.
2825*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002827expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2828 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002830 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002831 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002833 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002834 message ? message : EXPAT(ErrorString)(error_code),
2835 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002836 if (errmsg == NULL)
2837 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002838
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002839 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002840 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002841 if (!error)
2842 return;
2843
Eli Bendersky5b77d812012-03-16 08:20:05 +02002844 /* Add code and position attributes */
2845 code = PyLong_FromLong((long)error_code);
2846 if (!code) {
2847 Py_DECREF(error);
2848 return;
2849 }
2850 if (PyObject_SetAttrString(error, "code", code) == -1) {
2851 Py_DECREF(error);
2852 Py_DECREF(code);
2853 return;
2854 }
2855 Py_DECREF(code);
2856
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002857 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002858 if (!position) {
2859 Py_DECREF(error);
2860 return;
2861 }
2862 if (PyObject_SetAttrString(error, "position", position) == -1) {
2863 Py_DECREF(error);
2864 Py_DECREF(position);
2865 return;
2866 }
2867 Py_DECREF(position);
2868
Eli Bendersky532d03e2013-08-10 08:00:39 -07002869 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002870 Py_DECREF(error);
2871}
2872
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002873/* -------------------------------------------------------------------- */
2874/* handlers */
2875
2876static void
2877expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2878 int data_len)
2879{
2880 PyObject* key;
2881 PyObject* value;
2882 PyObject* res;
2883
2884 if (data_len < 2 || data_in[0] != '&')
2885 return;
2886
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002887 if (PyErr_Occurred())
2888 return;
2889
Neal Norwitz0269b912007-08-08 06:56:02 +00002890 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891 if (!key)
2892 return;
2893
2894 value = PyDict_GetItem(self->entity, key);
2895
2896 if (value) {
2897 if (TreeBuilder_CheckExact(self->target))
2898 res = treebuilder_handle_data(
2899 (TreeBuilderObject*) self->target, value
2900 );
2901 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002902 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903 else
2904 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002905 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002906 } else if (!PyErr_Occurred()) {
2907 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002908 char message[128] = "undefined entity ";
2909 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002910 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002911 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002913 EXPAT(GetErrorColumnNumber)(self->parser),
2914 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915 );
2916 }
2917
2918 Py_DECREF(key);
2919}
2920
2921static void
2922expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2923 const XML_Char **attrib_in)
2924{
2925 PyObject* res;
2926 PyObject* tag;
2927 PyObject* attrib;
2928 int ok;
2929
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002930 if (PyErr_Occurred())
2931 return;
2932
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 /* tag name */
2934 tag = makeuniversal(self, tag_in);
2935 if (!tag)
2936 return; /* parser will look for errors */
2937
2938 /* attributes */
2939 if (attrib_in[0]) {
2940 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002941 if (!attrib) {
2942 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002944 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 while (attrib_in[0] && attrib_in[1]) {
2946 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002947 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 if (!key || !value) {
2949 Py_XDECREF(value);
2950 Py_XDECREF(key);
2951 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002952 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953 return;
2954 }
2955 ok = PyDict_SetItem(attrib, key, value);
2956 Py_DECREF(value);
2957 Py_DECREF(key);
2958 if (ok < 0) {
2959 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002960 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961 return;
2962 }
2963 attrib_in += 2;
2964 }
2965 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002966 Py_INCREF(Py_None);
2967 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002968 }
2969
2970 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 /* shortcut */
2972 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2973 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002974 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002975 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002976 if (attrib == Py_None) {
2977 Py_DECREF(attrib);
2978 attrib = PyDict_New();
2979 if (!attrib) {
2980 Py_DECREF(tag);
2981 return;
2982 }
2983 }
Victor Stinner5abaa2b2016-12-09 16:22:32 +01002984 res = PyObject_CallFunctionObjArgs(self->handle_start,
2985 tag, attrib, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002986 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 res = NULL;
2988
2989 Py_DECREF(tag);
2990 Py_DECREF(attrib);
2991
2992 Py_XDECREF(res);
2993}
2994
2995static void
2996expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2997 int data_len)
2998{
2999 PyObject* data;
3000 PyObject* res;
3001
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003002 if (PyErr_Occurred())
3003 return;
3004
Neal Norwitz0269b912007-08-08 06:56:02 +00003005 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003006 if (!data)
3007 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008
3009 if (TreeBuilder_CheckExact(self->target))
3010 /* shortcut */
3011 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3012 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003013 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 else
3015 res = NULL;
3016
3017 Py_DECREF(data);
3018
3019 Py_XDECREF(res);
3020}
3021
3022static void
3023expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3024{
3025 PyObject* tag;
3026 PyObject* res = NULL;
3027
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003028 if (PyErr_Occurred())
3029 return;
3030
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 if (TreeBuilder_CheckExact(self->target))
3032 /* shortcut */
3033 /* the standard tree builder doesn't look at the end tag */
3034 res = treebuilder_handle_end(
3035 (TreeBuilderObject*) self->target, Py_None
3036 );
3037 else if (self->handle_end) {
3038 tag = makeuniversal(self, tag_in);
3039 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003040 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003041 Py_DECREF(tag);
3042 }
3043 }
3044
3045 Py_XDECREF(res);
3046}
3047
3048static void
3049expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3050 const XML_Char *uri)
3051{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003052 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3053 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003054
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003055 if (PyErr_Occurred())
3056 return;
3057
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003058 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003059 return;
3060
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003061 if (!uri)
3062 uri = "";
3063 if (!prefix)
3064 prefix = "";
3065
3066 parcel = Py_BuildValue("ss", prefix, uri);
3067 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003068 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003069 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3070 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071}
3072
3073static void
3074expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3075{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003076 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3077
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003078 if (PyErr_Occurred())
3079 return;
3080
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003081 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003082 return;
3083
3084 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085}
3086
3087static void
3088expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3089{
3090 PyObject* comment;
3091 PyObject* res;
3092
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003093 if (PyErr_Occurred())
3094 return;
3095
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003097 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003099 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3100 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003101 Py_XDECREF(res);
3102 Py_DECREF(comment);
3103 }
3104 }
3105}
3106
Eli Bendersky45839902013-01-13 05:14:47 -08003107static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003108expat_start_doctype_handler(XMLParserObject *self,
3109 const XML_Char *doctype_name,
3110 const XML_Char *sysid,
3111 const XML_Char *pubid,
3112 int has_internal_subset)
3113{
3114 PyObject *self_pyobj = (PyObject *)self;
3115 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3116 PyObject *parser_doctype = NULL;
3117 PyObject *res = NULL;
3118
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003119 if (PyErr_Occurred())
3120 return;
3121
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003122 doctype_name_obj = makeuniversal(self, doctype_name);
3123 if (!doctype_name_obj)
3124 return;
3125
3126 if (sysid) {
3127 sysid_obj = makeuniversal(self, sysid);
3128 if (!sysid_obj) {
3129 Py_DECREF(doctype_name_obj);
3130 return;
3131 }
3132 } else {
3133 Py_INCREF(Py_None);
3134 sysid_obj = Py_None;
3135 }
3136
3137 if (pubid) {
3138 pubid_obj = makeuniversal(self, pubid);
3139 if (!pubid_obj) {
3140 Py_DECREF(doctype_name_obj);
3141 Py_DECREF(sysid_obj);
3142 return;
3143 }
3144 } else {
3145 Py_INCREF(Py_None);
3146 pubid_obj = Py_None;
3147 }
3148
3149 /* If the target has a handler for doctype, call it. */
3150 if (self->handle_doctype) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003151 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3152 doctype_name_obj, pubid_obj,
3153 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003154 Py_CLEAR(res);
3155 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003156 else {
3157 /* Now see if the parser itself has a doctype method. If yes and it's
3158 * a custom method, call it but warn about deprecation. If it's only
3159 * the vanilla XMLParser method, do nothing.
3160 */
3161 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3162 if (parser_doctype &&
3163 !(PyCFunction_Check(parser_doctype) &&
3164 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3165 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003166 (PyCFunction) _elementtree_XMLParser_doctype)) {
3167 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3168 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003169 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003170 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003171 Py_DECREF(res);
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003172 res = PyObject_CallFunctionObjArgs(parser_doctype,
3173 doctype_name_obj, pubid_obj,
3174 sysid_obj, NULL);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003175 Py_CLEAR(res);
3176 }
3177 }
3178
3179clear:
3180 Py_XDECREF(parser_doctype);
3181 Py_DECREF(doctype_name_obj);
3182 Py_DECREF(pubid_obj);
3183 Py_DECREF(sysid_obj);
3184}
3185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003186static void
3187expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3188 const XML_Char* data_in)
3189{
3190 PyObject* target;
3191 PyObject* data;
3192 PyObject* res;
3193
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003194 if (PyErr_Occurred())
3195 return;
3196
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003197 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003198 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3199 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200 if (target && data) {
Victor Stinner5abaa2b2016-12-09 16:22:32 +01003201 res = PyObject_CallFunctionObjArgs(self->handle_pi,
3202 target, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003203 Py_XDECREF(res);
3204 Py_DECREF(data);
3205 Py_DECREF(target);
3206 } else {
3207 Py_XDECREF(data);
3208 Py_XDECREF(target);
3209 }
3210 }
3211}
3212
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003213/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214
Eli Bendersky52467b12012-06-01 07:13:08 +03003215static PyObject *
3216xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217{
Eli Bendersky52467b12012-06-01 07:13:08 +03003218 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3219 if (self) {
3220 self->parser = NULL;
3221 self->target = self->entity = self->names = NULL;
3222 self->handle_start = self->handle_data = self->handle_end = NULL;
3223 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003224 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003226 return (PyObject *)self;
3227}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228
Serhiy Storchakacb985562015-05-04 15:32:48 +03003229/*[clinic input]
3230_elementtree.XMLParser.__init__
3231
3232 html: object = NULL
3233 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003234 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003235
3236[clinic start generated code]*/
3237
Eli Bendersky52467b12012-06-01 07:13:08 +03003238static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003239_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3240 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003241/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003242{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243 self->entity = PyDict_New();
3244 if (!self->entity)
3245 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 self->names = PyDict_New();
3248 if (!self->names) {
3249 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003250 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003252
Serhiy Storchakacb985562015-05-04 15:32:48 +03003253 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3254 if (!self->parser) {
3255 Py_CLEAR(self->entity);
3256 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003258 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 }
3260
Eli Bendersky52467b12012-06-01 07:13:08 +03003261 if (target) {
3262 Py_INCREF(target);
3263 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003264 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266 Py_CLEAR(self->entity);
3267 Py_CLEAR(self->names);
3268 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003269 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003271 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273
Serhiy Storchakacb985562015-05-04 15:32:48 +03003274 self->handle_start = PyObject_GetAttrString(target, "start");
3275 self->handle_data = PyObject_GetAttrString(target, "data");
3276 self->handle_end = PyObject_GetAttrString(target, "end");
3277 self->handle_comment = PyObject_GetAttrString(target, "comment");
3278 self->handle_pi = PyObject_GetAttrString(target, "pi");
3279 self->handle_close = PyObject_GetAttrString(target, "close");
3280 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281
3282 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003283
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003287 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 (XML_StartElementHandler) expat_start_handler,
3289 (XML_EndElementHandler) expat_end_handler
3290 );
3291 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003292 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003293 (XML_DefaultHandler) expat_default_handler
3294 );
3295 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 (XML_CharacterDataHandler) expat_data_handler
3298 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003299 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 (XML_CommentHandler) expat_comment_handler
3303 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 (XML_ProcessingInstructionHandler) expat_pi_handler
3308 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003309 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003311 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3312 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003314 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003315 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317
Eli Bendersky52467b12012-06-01 07:13:08 +03003318 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319}
3320
Eli Bendersky52467b12012-06-01 07:13:08 +03003321static int
3322xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3323{
3324 Py_VISIT(self->handle_close);
3325 Py_VISIT(self->handle_pi);
3326 Py_VISIT(self->handle_comment);
3327 Py_VISIT(self->handle_end);
3328 Py_VISIT(self->handle_data);
3329 Py_VISIT(self->handle_start);
3330
3331 Py_VISIT(self->target);
3332 Py_VISIT(self->entity);
3333 Py_VISIT(self->names);
3334
3335 return 0;
3336}
3337
3338static int
3339xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340{
3341 EXPAT(ParserFree)(self->parser);
3342
Antoine Pitrouc1948842012-10-01 23:40:37 +02003343 Py_CLEAR(self->handle_close);
3344 Py_CLEAR(self->handle_pi);
3345 Py_CLEAR(self->handle_comment);
3346 Py_CLEAR(self->handle_end);
3347 Py_CLEAR(self->handle_data);
3348 Py_CLEAR(self->handle_start);
3349 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350
Antoine Pitrouc1948842012-10-01 23:40:37 +02003351 Py_CLEAR(self->target);
3352 Py_CLEAR(self->entity);
3353 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354
Eli Bendersky52467b12012-06-01 07:13:08 +03003355 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356}
3357
Eli Bendersky52467b12012-06-01 07:13:08 +03003358static void
3359xmlparser_dealloc(XMLParserObject* self)
3360{
3361 PyObject_GC_UnTrack(self);
3362 xmlparser_gc_clear(self);
3363 Py_TYPE(self)->tp_free((PyObject *)self);
3364}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003365
3366LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003367expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368{
3369 int ok;
3370
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003371 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3373
3374 if (PyErr_Occurred())
3375 return NULL;
3376
3377 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003378 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003379 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003381 EXPAT(GetErrorColumnNumber)(self->parser),
3382 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383 );
3384 return NULL;
3385 }
3386
3387 Py_RETURN_NONE;
3388}
3389
Serhiy Storchakacb985562015-05-04 15:32:48 +03003390/*[clinic input]
3391_elementtree.XMLParser.close
3392
3393[clinic start generated code]*/
3394
3395static PyObject *
3396_elementtree_XMLParser_close_impl(XMLParserObject *self)
3397/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398{
3399 /* end feeding data to parser */
3400
3401 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003403 if (!res)
3404 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003406 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407 Py_DECREF(res);
3408 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003409 }
3410 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003411 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003412 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003413 }
3414 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003415 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003416 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003417}
3418
Serhiy Storchakacb985562015-05-04 15:32:48 +03003419/*[clinic input]
3420_elementtree.XMLParser.feed
3421
3422 data: object
3423 /
3424
3425[clinic start generated code]*/
3426
3427static PyObject *
3428_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3429/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003430{
3431 /* feed data to parser */
3432
Serhiy Storchakacb985562015-05-04 15:32:48 +03003433 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003434 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003435 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3436 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003437 return NULL;
3438 if (data_len > INT_MAX) {
3439 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3440 return NULL;
3441 }
3442 /* Explicitly set UTF-8 encoding. Return code ignored. */
3443 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003444 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003445 }
3446 else {
3447 Py_buffer view;
3448 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003449 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003450 return NULL;
3451 if (view.len > INT_MAX) {
3452 PyBuffer_Release(&view);
3453 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3454 return NULL;
3455 }
3456 res = expat_parse(self, view.buf, (int)view.len, 0);
3457 PyBuffer_Release(&view);
3458 return res;
3459 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460}
3461
Serhiy Storchakacb985562015-05-04 15:32:48 +03003462/*[clinic input]
3463_elementtree.XMLParser._parse_whole
3464
3465 file: object
3466 /
3467
3468[clinic start generated code]*/
3469
3470static PyObject *
3471_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3472/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473{
Eli Benderskya3699232013-05-19 18:47:23 -07003474 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475 PyObject* reader;
3476 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003477 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478 PyObject* res;
3479
Serhiy Storchakacb985562015-05-04 15:32:48 +03003480 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003481 if (!reader)
3482 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003483
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003484 /* read from open file object */
3485 for (;;) {
3486
3487 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3488
3489 if (!buffer) {
3490 /* read failed (e.g. due to KeyboardInterrupt) */
3491 Py_DECREF(reader);
3492 return NULL;
3493 }
3494
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 if (PyUnicode_CheckExact(buffer)) {
3496 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003497 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003498 Py_DECREF(buffer);
3499 break;
3500 }
3501 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003502 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003503 if (!temp) {
3504 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003505 Py_DECREF(reader);
3506 return NULL;
3507 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003508 buffer = temp;
3509 }
3510 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 Py_DECREF(buffer);
3512 break;
3513 }
3514
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003515 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3516 Py_DECREF(buffer);
3517 Py_DECREF(reader);
3518 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3519 return NULL;
3520 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003522 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003523 );
3524
3525 Py_DECREF(buffer);
3526
3527 if (!res) {
3528 Py_DECREF(reader);
3529 return NULL;
3530 }
3531 Py_DECREF(res);
3532
3533 }
3534
3535 Py_DECREF(reader);
3536
3537 res = expat_parse(self, "", 0, 1);
3538
3539 if (res && TreeBuilder_CheckExact(self->target)) {
3540 Py_DECREF(res);
3541 return treebuilder_done((TreeBuilderObject*) self->target);
3542 }
3543
3544 return res;
3545}
3546
Serhiy Storchakacb985562015-05-04 15:32:48 +03003547/*[clinic input]
3548_elementtree.XMLParser.doctype
3549
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003550 name: object
3551 pubid: object
3552 system: object
3553 /
3554
Serhiy Storchakacb985562015-05-04 15:32:48 +03003555[clinic start generated code]*/
3556
3557static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003558_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3559 PyObject *pubid, PyObject *system)
3560/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003561{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003562 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3563 "This method of XMLParser is deprecated. Define"
3564 " doctype() method on the TreeBuilder target.",
3565 1) < 0) {
3566 return NULL;
3567 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003568 Py_RETURN_NONE;
3569}
3570
Serhiy Storchakacb985562015-05-04 15:32:48 +03003571/*[clinic input]
3572_elementtree.XMLParser._setevents
3573
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003574 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003575 events_to_report: object = None
3576 /
3577
3578[clinic start generated code]*/
3579
3580static PyObject *
3581_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3582 PyObject *events_queue,
3583 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003584/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585{
3586 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003587 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003588 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003589 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003590
3591 if (!TreeBuilder_CheckExact(self->target)) {
3592 PyErr_SetString(
3593 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003594 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595 "targets"
3596 );
3597 return NULL;
3598 }
3599
3600 target = (TreeBuilderObject*) self->target;
3601
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003602 events_append = PyObject_GetAttrString(events_queue, "append");
3603 if (events_append == NULL)
3604 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003605 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606
3607 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003608 Py_CLEAR(target->start_event_obj);
3609 Py_CLEAR(target->end_event_obj);
3610 Py_CLEAR(target->start_ns_event_obj);
3611 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003613 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003615 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616 Py_RETURN_NONE;
3617 }
3618
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 if (!(events_seq = PySequence_Fast(events_to_report,
3620 "events must be a sequence"))) {
3621 return NULL;
3622 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003623
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003624 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003625 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003626 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003627 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003628 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003629 } else if (PyBytes_Check(event_name_obj)) {
3630 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003631 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003632 if (event_name == NULL) {
3633 Py_DECREF(events_seq);
3634 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3635 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003636 }
3637
3638 Py_INCREF(event_name_obj);
3639 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003640 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003641 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003642 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003643 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003644 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003645 EXPAT(SetNamespaceDeclHandler)(
3646 self->parser,
3647 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3648 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3649 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003650 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003651 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003652 EXPAT(SetNamespaceDeclHandler)(
3653 self->parser,
3654 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3655 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3656 );
3657 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003658 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003659 Py_DECREF(events_seq);
3660 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003661 return NULL;
3662 }
3663 }
3664
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003665 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667}
3668
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003669static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003670xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003672 if (PyUnicode_Check(nameobj)) {
3673 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003674 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003675 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003676 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003677 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003678 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003679 return PyUnicode_FromFormat(
3680 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003682 }
3683 else
3684 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685
Alexander Belopolskye239d232010-12-08 23:31:48 +00003686 Py_INCREF(res);
3687 return res;
3688 }
3689 generic:
3690 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003691}
3692
Serhiy Storchakacb985562015-05-04 15:32:48 +03003693#include "clinic/_elementtree.c.h"
3694
3695static PyMethodDef element_methods[] = {
3696
3697 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3698
3699 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3700 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3701
3702 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3703 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3704 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3705
3706 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3707 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3708 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3709 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3710
3711 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3712 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3713 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3714
Victor Stinner84d8baa2016-09-29 22:12:35 +02003715 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003716 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3717
3718 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3719 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3720
3721 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3722
3723 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3724 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3725 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3726 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3727 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3728
3729 {NULL, NULL}
3730};
3731
3732static PyMappingMethods element_as_mapping = {
3733 (lenfunc) element_length,
3734 (binaryfunc) element_subscr,
3735 (objobjargproc) element_ass_subscr,
3736};
3737
Serhiy Storchakadde08152015-11-25 15:28:13 +02003738static PyGetSetDef element_getsetlist[] = {
3739 {"tag",
3740 (getter)element_tag_getter,
3741 (setter)element_tag_setter,
3742 "A string identifying what kind of data this element represents"},
3743 {"text",
3744 (getter)element_text_getter,
3745 (setter)element_text_setter,
3746 "A string of text directly after the start tag, or None"},
3747 {"tail",
3748 (getter)element_tail_getter,
3749 (setter)element_tail_setter,
3750 "A string of text directly after the end tag, or None"},
3751 {"attrib",
3752 (getter)element_attrib_getter,
3753 (setter)element_attrib_setter,
3754 "A dictionary containing the element's attributes"},
3755 {NULL},
3756};
3757
Serhiy Storchakacb985562015-05-04 15:32:48 +03003758static PyTypeObject Element_Type = {
3759 PyVarObject_HEAD_INIT(NULL, 0)
3760 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3761 /* methods */
3762 (destructor)element_dealloc, /* tp_dealloc */
3763 0, /* tp_print */
3764 0, /* tp_getattr */
3765 0, /* tp_setattr */
3766 0, /* tp_reserved */
3767 (reprfunc)element_repr, /* tp_repr */
3768 0, /* tp_as_number */
3769 &element_as_sequence, /* tp_as_sequence */
3770 &element_as_mapping, /* tp_as_mapping */
3771 0, /* tp_hash */
3772 0, /* tp_call */
3773 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003774 PyObject_GenericGetAttr, /* tp_getattro */
3775 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003776 0, /* tp_as_buffer */
3777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3778 /* tp_flags */
3779 0, /* tp_doc */
3780 (traverseproc)element_gc_traverse, /* tp_traverse */
3781 (inquiry)element_gc_clear, /* tp_clear */
3782 0, /* tp_richcompare */
3783 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3784 0, /* tp_iter */
3785 0, /* tp_iternext */
3786 element_methods, /* tp_methods */
3787 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003788 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003789 0, /* tp_base */
3790 0, /* tp_dict */
3791 0, /* tp_descr_get */
3792 0, /* tp_descr_set */
3793 0, /* tp_dictoffset */
3794 (initproc)element_init, /* tp_init */
3795 PyType_GenericAlloc, /* tp_alloc */
3796 element_new, /* tp_new */
3797 0, /* tp_free */
3798};
3799
3800static PyMethodDef treebuilder_methods[] = {
3801 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3802 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3803 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3804 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3805 {NULL, NULL}
3806};
3807
3808static PyTypeObject TreeBuilder_Type = {
3809 PyVarObject_HEAD_INIT(NULL, 0)
3810 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3811 /* methods */
3812 (destructor)treebuilder_dealloc, /* tp_dealloc */
3813 0, /* tp_print */
3814 0, /* tp_getattr */
3815 0, /* tp_setattr */
3816 0, /* tp_reserved */
3817 0, /* tp_repr */
3818 0, /* tp_as_number */
3819 0, /* tp_as_sequence */
3820 0, /* tp_as_mapping */
3821 0, /* tp_hash */
3822 0, /* tp_call */
3823 0, /* tp_str */
3824 0, /* tp_getattro */
3825 0, /* tp_setattro */
3826 0, /* tp_as_buffer */
3827 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3828 /* tp_flags */
3829 0, /* tp_doc */
3830 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3831 (inquiry)treebuilder_gc_clear, /* tp_clear */
3832 0, /* tp_richcompare */
3833 0, /* tp_weaklistoffset */
3834 0, /* tp_iter */
3835 0, /* tp_iternext */
3836 treebuilder_methods, /* tp_methods */
3837 0, /* tp_members */
3838 0, /* tp_getset */
3839 0, /* tp_base */
3840 0, /* tp_dict */
3841 0, /* tp_descr_get */
3842 0, /* tp_descr_set */
3843 0, /* tp_dictoffset */
3844 _elementtree_TreeBuilder___init__, /* tp_init */
3845 PyType_GenericAlloc, /* tp_alloc */
3846 treebuilder_new, /* tp_new */
3847 0, /* tp_free */
3848};
3849
3850static PyMethodDef xmlparser_methods[] = {
3851 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3852 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3853 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3854 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3855 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3856 {NULL, NULL}
3857};
3858
Neal Norwitz227b5332006-03-22 09:28:35 +00003859static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003860 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003861 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003862 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003863 (destructor)xmlparser_dealloc, /* tp_dealloc */
3864 0, /* tp_print */
3865 0, /* tp_getattr */
3866 0, /* tp_setattr */
3867 0, /* tp_reserved */
3868 0, /* tp_repr */
3869 0, /* tp_as_number */
3870 0, /* tp_as_sequence */
3871 0, /* tp_as_mapping */
3872 0, /* tp_hash */
3873 0, /* tp_call */
3874 0, /* tp_str */
3875 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3876 0, /* tp_setattro */
3877 0, /* tp_as_buffer */
3878 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3879 /* tp_flags */
3880 0, /* tp_doc */
3881 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3882 (inquiry)xmlparser_gc_clear, /* tp_clear */
3883 0, /* tp_richcompare */
3884 0, /* tp_weaklistoffset */
3885 0, /* tp_iter */
3886 0, /* tp_iternext */
3887 xmlparser_methods, /* tp_methods */
3888 0, /* tp_members */
3889 0, /* tp_getset */
3890 0, /* tp_base */
3891 0, /* tp_dict */
3892 0, /* tp_descr_get */
3893 0, /* tp_descr_set */
3894 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003895 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003896 PyType_GenericAlloc, /* tp_alloc */
3897 xmlparser_new, /* tp_new */
3898 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003899};
3900
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003901/* ==================================================================== */
3902/* python module interface */
3903
3904static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003905 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003906 {NULL, NULL}
3907};
3908
Martin v. Löwis1a214512008-06-11 05:26:20 +00003909
Eli Bendersky532d03e2013-08-10 08:00:39 -07003910static struct PyModuleDef elementtreemodule = {
3911 PyModuleDef_HEAD_INIT,
3912 "_elementtree",
3913 NULL,
3914 sizeof(elementtreestate),
3915 _functions,
3916 NULL,
3917 elementtree_traverse,
3918 elementtree_clear,
3919 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003920};
3921
Neal Norwitzf6657e62006-12-28 04:47:50 +00003922PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003923PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003924{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003925 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003926 elementtreestate *st;
3927
3928 m = PyState_FindModule(&elementtreemodule);
3929 if (m) {
3930 Py_INCREF(m);
3931 return m;
3932 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003933
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003934 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003935 if (PyType_Ready(&ElementIter_Type) < 0)
3936 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003937 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003938 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003939 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003940 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003941 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003942 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003943
Eli Bendersky532d03e2013-08-10 08:00:39 -07003944 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003945 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003946 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003947 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003948
Eli Bendersky828efde2012-04-05 05:40:58 +03003949 if (!(temp = PyImport_ImportModule("copy")))
3950 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003951 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003952 Py_XDECREF(temp);
3953
Eli Bendersky532d03e2013-08-10 08:00:39 -07003954 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003955 return NULL;
3956
Eli Bendersky20d41742012-06-01 09:48:37 +03003957 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003958 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3959 if (expat_capi) {
3960 /* check that it's usable */
3961 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003962 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003963 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3964 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003965 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003966 PyErr_SetString(PyExc_ImportError,
3967 "pyexpat version is incompatible");
3968 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003969 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003970 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003971 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003972 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003973
Eli Bendersky532d03e2013-08-10 08:00:39 -07003974 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003975 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003976 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003977 Py_INCREF(st->parseerror_obj);
3978 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003979
Eli Bendersky092af1f2012-03-04 07:14:03 +02003980 Py_INCREF((PyObject *)&Element_Type);
3981 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3982
Eli Bendersky58d548d2012-05-29 15:45:16 +03003983 Py_INCREF((PyObject *)&TreeBuilder_Type);
3984 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3985
Eli Bendersky52467b12012-06-01 07:13:08 +03003986 Py_INCREF((PyObject *)&XMLParser_Type);
3987 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003988
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003989 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003990}