blob: 71245c23d96c6508314a69cb47790059681e7e1d [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300629
630 if (self->weakreflist != NULL)
631 PyObject_ClearWeakRefs((PyObject *) self);
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 /* element_gc_clear clears all references and deallocates extra
634 */
635 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000636
637 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200638 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639}
640
641/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
Serhiy Storchakacb985562015-05-04 15:32:48 +0300643/*[clinic input]
644_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645
Serhiy Storchakacb985562015-05-04 15:32:48 +0300646 subelement: object(subclass_of='&Element_Type')
647 /
648
649[clinic start generated code]*/
650
651static PyObject *
652_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
653/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
654{
655 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656 return NULL;
657
658 Py_RETURN_NONE;
659}
660
Serhiy Storchakacb985562015-05-04 15:32:48 +0300661/*[clinic input]
662_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
Serhiy Storchakacb985562015-05-04 15:32:48 +0300664[clinic start generated code]*/
665
666static PyObject *
667_elementtree_Element_clear_impl(ElementObject *self)
668/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
669{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
672 Py_INCREF(Py_None);
673 Py_DECREF(JOIN_OBJ(self->text));
674 self->text = Py_None;
675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->tail));
678 self->tail = Py_None;
679
680 Py_RETURN_NONE;
681}
682
Serhiy Storchakacb985562015-05-04 15:32:48 +0300683/*[clinic input]
684_elementtree.Element.__copy__
685
686[clinic start generated code]*/
687
688static PyObject *
689_elementtree_Element___copy___impl(ElementObject *self)
690/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200692 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 ElementObject* element;
694
Eli Bendersky092af1f2012-03-04 07:14:03 +0200695 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800696 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 if (!element)
698 return NULL;
699
700 Py_DECREF(JOIN_OBJ(element->text));
701 element->text = self->text;
702 Py_INCREF(JOIN_OBJ(element->text));
703
704 Py_DECREF(JOIN_OBJ(element->tail));
705 element->tail = self->tail;
706 Py_INCREF(JOIN_OBJ(element->tail));
707
708 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000709 if (element_resize(element, self->extra->length) < 0) {
710 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 for (i = 0; i < self->extra->length; i++) {
715 Py_INCREF(self->extra->children[i]);
716 element->extra->children[i] = self->extra->children[i];
717 }
718
719 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 }
721
722 return (PyObject*) element;
723}
724
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200725/* Helper for a deep copy. */
726LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
727
Serhiy Storchakacb985562015-05-04 15:32:48 +0300728/*[clinic input]
729_elementtree.Element.__deepcopy__
730
731 memo: object
732 /
733
734[clinic start generated code]*/
735
736static PyObject *
737_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
738/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200740 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 ElementObject* element;
742 PyObject* tag;
743 PyObject* attrib;
744 PyObject* text;
745 PyObject* tail;
746 PyObject* id;
747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 tag = deepcopy(self->tag, memo);
749 if (!tag)
750 return NULL;
751
752 if (self->extra) {
753 attrib = deepcopy(self->extra->attrib, memo);
754 if (!attrib) {
755 Py_DECREF(tag);
756 return NULL;
757 }
758 } else {
759 Py_INCREF(Py_None);
760 attrib = Py_None;
761 }
762
Eli Bendersky092af1f2012-03-04 07:14:03 +0200763 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 Py_DECREF(tag);
766 Py_DECREF(attrib);
767
768 if (!element)
769 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 text = deepcopy(JOIN_OBJ(self->text), memo);
772 if (!text)
773 goto error;
774 Py_DECREF(element->text);
775 element->text = JOIN_SET(text, JOIN_GET(self->text));
776
777 tail = deepcopy(JOIN_OBJ(self->tail), memo);
778 if (!tail)
779 goto error;
780 Py_DECREF(element->tail);
781 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
782
783 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 if (element_resize(element, self->extra->length) < 0)
785 goto error;
786
787 for (i = 0; i < self->extra->length; i++) {
788 PyObject* child = deepcopy(self->extra->children[i], memo);
789 if (!child) {
790 element->extra->length = i;
791 goto error;
792 }
793 element->extra->children[i] = child;
794 }
795
796 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 }
798
799 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700800 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 if (!id)
802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 i = PyDict_SetItem(memo, id, (PyObject*) element);
805
806 Py_DECREF(id);
807
808 if (i < 0)
809 goto error;
810
811 return (PyObject*) element;
812
813 error:
814 Py_DECREF(element);
815 return NULL;
816}
817
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200818LOCAL(PyObject *)
819deepcopy(PyObject *object, PyObject *memo)
820{
821 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200822 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200823 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200824
825 /* Fast paths */
826 if (object == Py_None || PyUnicode_CheckExact(object)) {
827 Py_INCREF(object);
828 return object;
829 }
830
831 if (Py_REFCNT(object) == 1) {
832 if (PyDict_CheckExact(object)) {
833 PyObject *key, *value;
834 Py_ssize_t pos = 0;
835 int simple = 1;
836 while (PyDict_Next(object, &pos, &key, &value)) {
837 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
838 simple = 0;
839 break;
840 }
841 }
842 if (simple)
843 return PyDict_Copy(object);
844 /* Fall through to general case */
845 }
846 else if (Element_CheckExact(object)) {
847 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
848 }
849 }
850
851 /* General case */
852 st = ET_STATE_GLOBAL;
853 if (!st->deepcopy_obj) {
854 PyErr_SetString(PyExc_RuntimeError,
855 "deepcopy helper not found");
856 return NULL;
857 }
858
Victor Stinner7fbac452016-08-20 01:34:44 +0200859 stack[0] = object;
860 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200861 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200862}
863
864
Serhiy Storchakacb985562015-05-04 15:32:48 +0300865/*[clinic input]
866_elementtree.Element.__sizeof__ -> Py_ssize_t
867
868[clinic start generated code]*/
869
870static Py_ssize_t
871_elementtree_Element___sizeof___impl(ElementObject *self)
872/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200873{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200874 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875 if (self->extra) {
876 result += sizeof(ElementObjectExtra);
877 if (self->extra->children != self->extra->_children)
878 result += sizeof(PyObject*) * self->extra->allocated;
879 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300880 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881}
882
Eli Bendersky698bdb22013-01-10 06:01:06 -0800883/* dict keys for getstate/setstate. */
884#define PICKLED_TAG "tag"
885#define PICKLED_CHILDREN "_children"
886#define PICKLED_ATTRIB "attrib"
887#define PICKLED_TAIL "tail"
888#define PICKLED_TEXT "text"
889
890/* __getstate__ returns a fabricated instance dict as in the pure-Python
891 * Element implementation, for interoperability/interchangeability. This
892 * makes the pure-Python implementation details an API, but (a) there aren't
893 * any unnecessary structures there; and (b) it buys compatibility with 3.2
894 * pickles. See issue #16076.
895 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300896/*[clinic input]
897_elementtree.Element.__getstate__
898
899[clinic start generated code]*/
900
Eli Bendersky698bdb22013-01-10 06:01:06 -0800901static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902_elementtree_Element___getstate___impl(ElementObject *self)
903/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800904{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200905 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906 PyObject *instancedict = NULL, *children;
907
908 /* Build a list of children. */
909 children = PyList_New(self->extra ? self->extra->length : 0);
910 if (!children)
911 return NULL;
912 for (i = 0; i < PyList_GET_SIZE(children); i++) {
913 PyObject *child = self->extra->children[i];
914 Py_INCREF(child);
915 PyList_SET_ITEM(children, i, child);
916 }
917
918 /* Construct the state object. */
919 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
920 if (noattrib)
921 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
922 PICKLED_TAG, self->tag,
923 PICKLED_CHILDREN, children,
924 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700925 PICKLED_TEXT, JOIN_OBJ(self->text),
926 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800927 else
928 instancedict = Py_BuildValue("{sOsOsOsOsO}",
929 PICKLED_TAG, self->tag,
930 PICKLED_CHILDREN, children,
931 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700932 PICKLED_TEXT, JOIN_OBJ(self->text),
933 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800934 if (instancedict) {
935 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800936 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800937 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 else {
939 for (i = 0; i < PyList_GET_SIZE(children); i++)
940 Py_DECREF(PyList_GET_ITEM(children, i));
941 Py_DECREF(children);
942
943 return NULL;
944 }
945}
946
947static PyObject *
948element_setstate_from_attributes(ElementObject *self,
949 PyObject *tag,
950 PyObject *attrib,
951 PyObject *text,
952 PyObject *tail,
953 PyObject *children)
954{
955 Py_ssize_t i, nchildren;
956
957 if (!tag) {
958 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
959 return NULL;
960 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800961
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200962 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300963 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964
Eli Benderskydd3661e2013-09-13 06:24:25 -0700965 _clear_joined_ptr(&self->text);
966 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
967 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->tail);
970 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
973 /* Handle ATTRIB and CHILDREN. */
974 if (!children && !attrib)
975 Py_RETURN_NONE;
976
977 /* Compute 'nchildren'. */
978 if (children) {
979 if (!PyList_Check(children)) {
980 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
981 return NULL;
982 }
983 nchildren = PyList_Size(children);
984 }
985 else {
986 nchildren = 0;
987 }
988
989 /* Allocate 'extra'. */
990 if (element_resize(self, nchildren)) {
991 return NULL;
992 }
993 assert(self->extra && self->extra->allocated >= nchildren);
994
995 /* Copy children */
996 for (i = 0; i < nchildren; i++) {
997 self->extra->children[i] = PyList_GET_ITEM(children, i);
998 Py_INCREF(self->extra->children[i]);
999 }
1000
1001 self->extra->length = nchildren;
1002 self->extra->allocated = nchildren;
1003
1004 /* Stash attrib. */
1005 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001006 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001007 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 }
1009
1010 Py_RETURN_NONE;
1011}
1012
1013/* __setstate__ for Element instance from the Python implementation.
1014 * 'state' should be the instance dict.
1015 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001016
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017static PyObject *
1018element_setstate_from_Python(ElementObject *self, PyObject *state)
1019{
1020 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1021 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1022 PyObject *args;
1023 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001024 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001025
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026 tag = attrib = text = tail = children = NULL;
1027 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030
1031 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1032 &attrib, &text, &tail, &children))
1033 retval = element_setstate_from_attributes(self, tag, attrib, text,
1034 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036 retval = NULL;
1037
1038 Py_DECREF(args);
1039 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040}
1041
Serhiy Storchakacb985562015-05-04 15:32:48 +03001042/*[clinic input]
1043_elementtree.Element.__setstate__
1044
1045 state: object
1046 /
1047
1048[clinic start generated code]*/
1049
Eli Bendersky698bdb22013-01-10 06:01:06 -08001050static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001051_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1052/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001053{
1054 if (!PyDict_CheckExact(state)) {
1055 PyErr_Format(PyExc_TypeError,
1056 "Don't know how to unpickle \"%.200R\" as an Element",
1057 state);
1058 return NULL;
1059 }
1060 else
1061 return element_setstate_from_Python(self, state);
1062}
1063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064LOCAL(int)
1065checkpath(PyObject* tag)
1066{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001067 Py_ssize_t i;
1068 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069
1070 /* check if a tag contains an xpath character */
1071
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001072#define PATHCHAR(ch) \
1073 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1077 void *data = PyUnicode_DATA(tag);
1078 unsigned int kind = PyUnicode_KIND(tag);
1079 for (i = 0; i < len; i++) {
1080 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1081 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001082 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001083 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 return 1;
1087 }
1088 return 0;
1089 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001090 if (PyBytes_Check(tag)) {
1091 char *p = PyBytes_AS_STRING(tag);
1092 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 if (p[i] == '{')
1094 check = 0;
1095 else if (p[i] == '}')
1096 check = 1;
1097 else if (check && PATHCHAR(p[i]))
1098 return 1;
1099 }
1100 return 0;
1101 }
1102
1103 return 1; /* unknown type; might be path expression */
1104}
1105
Serhiy Storchakacb985562015-05-04 15:32:48 +03001106/*[clinic input]
1107_elementtree.Element.extend
1108
1109 elements: object
1110 /
1111
1112[clinic start generated code]*/
1113
1114static PyObject *
1115_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1116/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001117{
1118 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001119 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001120
Serhiy Storchakacb985562015-05-04 15:32:48 +03001121 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 if (!seq) {
1123 PyErr_Format(
1124 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 );
1127 return NULL;
1128 }
1129
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001130 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 Py_INCREF(element);
1133 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001134 PyErr_Format(
1135 PyExc_TypeError,
1136 "expected an Element, not \"%.200s\"",
1137 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 Py_DECREF(seq);
1139 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001140 return NULL;
1141 }
1142
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001143 if (element_add_subelement(self, element) < 0) {
1144 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001145 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001146 return NULL;
1147 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001148 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 }
1150
1151 Py_DECREF(seq);
1152
1153 Py_RETURN_NONE;
1154}
1155
Serhiy Storchakacb985562015-05-04 15:32:48 +03001156/*[clinic input]
1157_elementtree.Element.find
1158
1159 path: object
1160 namespaces: object = None
1161
1162[clinic start generated code]*/
1163
1164static PyObject *
1165_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1166 PyObject *namespaces)
1167/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001169 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001170 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001171
Serhiy Storchakacb985562015-05-04 15:32:48 +03001172 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001173 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001174 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001175 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178
1179 if (!self->extra)
1180 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 for (i = 0; i < self->extra->length; i++) {
1183 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001184 int rc;
1185 if (!Element_CheckExact(item))
1186 continue;
1187 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001188 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001189 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 Py_DECREF(item);
1192 if (rc < 0)
1193 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 }
1195
1196 Py_RETURN_NONE;
1197}
1198
Serhiy Storchakacb985562015-05-04 15:32:48 +03001199/*[clinic input]
1200_elementtree.Element.findtext
1201
1202 path: object
1203 default: object = None
1204 namespaces: object = None
1205
1206[clinic start generated code]*/
1207
1208static PyObject *
1209_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1210 PyObject *default_value,
1211 PyObject *namespaces)
1212/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001214 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001215 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001216 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001217
Serhiy Storchakacb985562015-05-04 15:32:48 +03001218 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001220 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 );
1222
1223 if (!self->extra) {
1224 Py_INCREF(default_value);
1225 return default_value;
1226 }
1227
1228 for (i = 0; i < self->extra->length; i++) {
1229 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001230 int rc;
1231 if (!Element_CheckExact(item))
1232 continue;
1233 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001234 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001235 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 if (text == Py_None) {
1238 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001239 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001240 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001241 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001243 return text;
1244 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 Py_DECREF(item);
1246 if (rc < 0)
1247 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001248 }
1249
1250 Py_INCREF(default_value);
1251 return default_value;
1252}
1253
Serhiy Storchakacb985562015-05-04 15:32:48 +03001254/*[clinic input]
1255_elementtree.Element.findall
1256
1257 path: object
1258 namespaces: object = None
1259
1260[clinic start generated code]*/
1261
1262static PyObject *
1263_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1264 PyObject *namespaces)
1265/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001266{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001267 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001268 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001269 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001270 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001271
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001272 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001273 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001274 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001275 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001276 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278
1279 out = PyList_New(0);
1280 if (!out)
1281 return NULL;
1282
1283 if (!self->extra)
1284 return out;
1285
1286 for (i = 0; i < self->extra->length; i++) {
1287 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001288 int rc;
1289 if (!Element_CheckExact(item))
1290 continue;
1291 Py_INCREF(item);
1292 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1293 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1294 Py_DECREF(item);
1295 Py_DECREF(out);
1296 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001297 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001298 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001299 }
1300
1301 return out;
1302}
1303
Serhiy Storchakacb985562015-05-04 15:32:48 +03001304/*[clinic input]
1305_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001306
Serhiy Storchakacb985562015-05-04 15:32:48 +03001307 path: object
1308 namespaces: object = None
1309
1310[clinic start generated code]*/
1311
1312static PyObject *
1313_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1314 PyObject *namespaces)
1315/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1316{
1317 PyObject* tag = path;
1318 _Py_IDENTIFIER(iterfind);
1319 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001320
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001321 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001322 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001323}
1324
Serhiy Storchakacb985562015-05-04 15:32:48 +03001325/*[clinic input]
1326_elementtree.Element.get
1327
1328 key: object
1329 default: object = None
1330
1331[clinic start generated code]*/
1332
1333static PyObject *
1334_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1335 PyObject *default_value)
1336/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001337{
1338 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001339
1340 if (!self->extra || self->extra->attrib == Py_None)
1341 value = default_value;
1342 else {
1343 value = PyDict_GetItem(self->extra->attrib, key);
1344 if (!value)
1345 value = default_value;
1346 }
1347
1348 Py_INCREF(value);
1349 return value;
1350}
1351
Serhiy Storchakacb985562015-05-04 15:32:48 +03001352/*[clinic input]
1353_elementtree.Element.getchildren
1354
1355[clinic start generated code]*/
1356
1357static PyObject *
1358_elementtree_Element_getchildren_impl(ElementObject *self)
1359/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001360{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001361 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001362 PyObject* list;
1363
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001364 /* FIXME: report as deprecated? */
1365
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 if (!self->extra)
1367 return PyList_New(0);
1368
1369 list = PyList_New(self->extra->length);
1370 if (!list)
1371 return NULL;
1372
1373 for (i = 0; i < self->extra->length; i++) {
1374 PyObject* item = self->extra->children[i];
1375 Py_INCREF(item);
1376 PyList_SET_ITEM(list, i, item);
1377 }
1378
1379 return list;
1380}
1381
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001382
Eli Bendersky64d11e62012-06-15 07:42:50 +03001383static PyObject *
1384create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1385
1386
Serhiy Storchakacb985562015-05-04 15:32:48 +03001387/*[clinic input]
1388_elementtree.Element.iter
1389
1390 tag: object = None
1391
1392[clinic start generated code]*/
1393
Eli Bendersky64d11e62012-06-15 07:42:50 +03001394static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001395_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1396/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001397{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001398 if (PyUnicode_Check(tag)) {
1399 if (PyUnicode_READY(tag) < 0)
1400 return NULL;
1401 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1402 tag = Py_None;
1403 }
1404 else if (PyBytes_Check(tag)) {
1405 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1406 tag = Py_None;
1407 }
1408
Eli Bendersky64d11e62012-06-15 07:42:50 +03001409 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001410}
1411
1412
Serhiy Storchakacb985562015-05-04 15:32:48 +03001413/*[clinic input]
1414_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001415
Serhiy Storchakacb985562015-05-04 15:32:48 +03001416[clinic start generated code]*/
1417
1418static PyObject *
1419_elementtree_Element_itertext_impl(ElementObject *self)
1420/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1421{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001422 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001423}
1424
Eli Bendersky64d11e62012-06-15 07:42:50 +03001425
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001426static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001427element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001428{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001429 ElementObject* self = (ElementObject*) self_;
1430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431 if (!self->extra || index < 0 || index >= self->extra->length) {
1432 PyErr_SetString(
1433 PyExc_IndexError,
1434 "child index out of range"
1435 );
1436 return NULL;
1437 }
1438
1439 Py_INCREF(self->extra->children[index]);
1440 return self->extra->children[index];
1441}
1442
Serhiy Storchakacb985562015-05-04 15:32:48 +03001443/*[clinic input]
1444_elementtree.Element.insert
1445
1446 index: Py_ssize_t
1447 subelement: object(subclass_of='&Element_Type')
1448 /
1449
1450[clinic start generated code]*/
1451
1452static PyObject *
1453_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1454 PyObject *subelement)
1455/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001457 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458
Victor Stinner5f0af232013-07-11 23:01:36 +02001459 if (!self->extra) {
1460 if (create_extra(self, NULL) < 0)
1461 return NULL;
1462 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001463
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001464 if (index < 0) {
1465 index += self->extra->length;
1466 if (index < 0)
1467 index = 0;
1468 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469 if (index > self->extra->length)
1470 index = self->extra->length;
1471
1472 if (element_resize(self, 1) < 0)
1473 return NULL;
1474
1475 for (i = self->extra->length; i > index; i--)
1476 self->extra->children[i] = self->extra->children[i-1];
1477
Serhiy Storchakacb985562015-05-04 15:32:48 +03001478 Py_INCREF(subelement);
1479 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480
1481 self->extra->length++;
1482
1483 Py_RETURN_NONE;
1484}
1485
Serhiy Storchakacb985562015-05-04 15:32:48 +03001486/*[clinic input]
1487_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488
Serhiy Storchakacb985562015-05-04 15:32:48 +03001489[clinic start generated code]*/
1490
1491static PyObject *
1492_elementtree_Element_items_impl(ElementObject *self)
1493/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1494{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495 if (!self->extra || self->extra->attrib == Py_None)
1496 return PyList_New(0);
1497
1498 return PyDict_Items(self->extra->attrib);
1499}
1500
Serhiy Storchakacb985562015-05-04 15:32:48 +03001501/*[clinic input]
1502_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001503
Serhiy Storchakacb985562015-05-04 15:32:48 +03001504[clinic start generated code]*/
1505
1506static PyObject *
1507_elementtree_Element_keys_impl(ElementObject *self)
1508/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1509{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510 if (!self->extra || self->extra->attrib == Py_None)
1511 return PyList_New(0);
1512
1513 return PyDict_Keys(self->extra->attrib);
1514}
1515
Martin v. Löwis18e16552006-02-15 17:27:45 +00001516static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001517element_length(ElementObject* self)
1518{
1519 if (!self->extra)
1520 return 0;
1521
1522 return self->extra->length;
1523}
1524
Serhiy Storchakacb985562015-05-04 15:32:48 +03001525/*[clinic input]
1526_elementtree.Element.makeelement
1527
1528 tag: object
1529 attrib: object
1530 /
1531
1532[clinic start generated code]*/
1533
1534static PyObject *
1535_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1536 PyObject *attrib)
1537/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001538{
1539 PyObject* elem;
1540
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541 attrib = PyDict_Copy(attrib);
1542 if (!attrib)
1543 return NULL;
1544
Eli Bendersky092af1f2012-03-04 07:14:03 +02001545 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546
1547 Py_DECREF(attrib);
1548
1549 return elem;
1550}
1551
Serhiy Storchakacb985562015-05-04 15:32:48 +03001552/*[clinic input]
1553_elementtree.Element.remove
1554
1555 subelement: object(subclass_of='&Element_Type')
1556 /
1557
1558[clinic start generated code]*/
1559
1560static PyObject *
1561_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1562/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001564 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001565 int rc;
1566 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568 if (!self->extra) {
1569 /* element has no children, so raise exception */
1570 PyErr_SetString(
1571 PyExc_ValueError,
1572 "list.remove(x): x not in list"
1573 );
1574 return NULL;
1575 }
1576
1577 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001578 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001580 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001581 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001582 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001583 if (rc < 0)
1584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585 }
1586
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001587 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001588 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 PyErr_SetString(
1590 PyExc_ValueError,
1591 "list.remove(x): x not in list"
1592 );
1593 return NULL;
1594 }
1595
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001596 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597
1598 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599 for (; i < self->extra->length; i++)
1600 self->extra->children[i] = self->extra->children[i+1];
1601
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001602 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 Py_RETURN_NONE;
1604}
1605
1606static PyObject*
1607element_repr(ElementObject* self)
1608{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001609 int status;
1610
1611 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001612 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001613
1614 status = Py_ReprEnter((PyObject *)self);
1615 if (status == 0) {
1616 PyObject *res;
1617 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1618 Py_ReprLeave((PyObject *)self);
1619 return res;
1620 }
1621 if (status > 0)
1622 PyErr_Format(PyExc_RuntimeError,
1623 "reentrant call inside %s.__repr__",
1624 Py_TYPE(self)->tp_name);
1625 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626}
1627
Serhiy Storchakacb985562015-05-04 15:32:48 +03001628/*[clinic input]
1629_elementtree.Element.set
1630
1631 key: object
1632 value: object
1633 /
1634
1635[clinic start generated code]*/
1636
1637static PyObject *
1638_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1639 PyObject *value)
1640/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641{
1642 PyObject* attrib;
1643
Victor Stinner5f0af232013-07-11 23:01:36 +02001644 if (!self->extra) {
1645 if (create_extra(self, NULL) < 0)
1646 return NULL;
1647 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001648
1649 attrib = element_get_attrib(self);
1650 if (!attrib)
1651 return NULL;
1652
1653 if (PyDict_SetItem(attrib, key, value) < 0)
1654 return NULL;
1655
1656 Py_RETURN_NONE;
1657}
1658
1659static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001660element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001661{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001662 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001663 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001664 PyObject* old;
1665
1666 if (!self->extra || index < 0 || index >= self->extra->length) {
1667 PyErr_SetString(
1668 PyExc_IndexError,
1669 "child assignment index out of range");
1670 return -1;
1671 }
1672
1673 old = self->extra->children[index];
1674
1675 if (item) {
1676 Py_INCREF(item);
1677 self->extra->children[index] = item;
1678 } else {
1679 self->extra->length--;
1680 for (i = index; i < self->extra->length; i++)
1681 self->extra->children[i] = self->extra->children[i+1];
1682 }
1683
1684 Py_DECREF(old);
1685
1686 return 0;
1687}
1688
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689static PyObject*
1690element_subscr(PyObject* self_, PyObject* item)
1691{
1692 ElementObject* self = (ElementObject*) self_;
1693
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001694 if (PyIndex_Check(item)) {
1695 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001696
1697 if (i == -1 && PyErr_Occurred()) {
1698 return NULL;
1699 }
1700 if (i < 0 && self->extra)
1701 i += self->extra->length;
1702 return element_getitem(self_, i);
1703 }
1704 else if (PySlice_Check(item)) {
1705 Py_ssize_t start, stop, step, slicelen, cur, i;
1706 PyObject* list;
1707
1708 if (!self->extra)
1709 return PyList_New(0);
1710
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001711 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001712 self->extra->length,
1713 &start, &stop, &step, &slicelen) < 0) {
1714 return NULL;
1715 }
1716
1717 if (slicelen <= 0)
1718 return PyList_New(0);
1719 else {
1720 list = PyList_New(slicelen);
1721 if (!list)
1722 return NULL;
1723
1724 for (cur = start, i = 0; i < slicelen;
1725 cur += step, i++) {
1726 PyObject* item = self->extra->children[cur];
1727 Py_INCREF(item);
1728 PyList_SET_ITEM(list, i, item);
1729 }
1730
1731 return list;
1732 }
1733 }
1734 else {
1735 PyErr_SetString(PyExc_TypeError,
1736 "element indices must be integers");
1737 return NULL;
1738 }
1739}
1740
1741static int
1742element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1743{
1744 ElementObject* self = (ElementObject*) self_;
1745
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746 if (PyIndex_Check(item)) {
1747 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748
1749 if (i == -1 && PyErr_Occurred()) {
1750 return -1;
1751 }
1752 if (i < 0 && self->extra)
1753 i += self->extra->length;
1754 return element_setitem(self_, i, value);
1755 }
1756 else if (PySlice_Check(item)) {
1757 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1758
1759 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001760 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001761
Victor Stinner5f0af232013-07-11 23:01:36 +02001762 if (!self->extra) {
1763 if (create_extra(self, NULL) < 0)
1764 return -1;
1765 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001767 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001768 self->extra->length,
1769 &start, &stop, &step, &slicelen) < 0) {
1770 return -1;
1771 }
1772
Eli Bendersky865756a2012-03-09 13:38:15 +02001773 if (value == NULL) {
1774 /* Delete slice */
1775 size_t cur;
1776 Py_ssize_t i;
1777
1778 if (slicelen <= 0)
1779 return 0;
1780
1781 /* Since we're deleting, the direction of the range doesn't matter,
1782 * so for simplicity make it always ascending.
1783 */
1784 if (step < 0) {
1785 stop = start + 1;
1786 start = stop + step * (slicelen - 1) - 1;
1787 step = -step;
1788 }
1789
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001790 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001791
1792 /* recycle is a list that will contain all the children
1793 * scheduled for removal.
1794 */
1795 if (!(recycle = PyList_New(slicelen))) {
1796 PyErr_NoMemory();
1797 return -1;
1798 }
1799
1800 /* This loop walks over all the children that have to be deleted,
1801 * with cur pointing at them. num_moved is the amount of children
1802 * until the next deleted child that have to be "shifted down" to
1803 * occupy the deleted's places.
1804 * Note that in the ith iteration, shifting is done i+i places down
1805 * because i children were already removed.
1806 */
1807 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1808 /* Compute how many children have to be moved, clipping at the
1809 * list end.
1810 */
1811 Py_ssize_t num_moved = step - 1;
1812 if (cur + step >= (size_t)self->extra->length) {
1813 num_moved = self->extra->length - cur - 1;
1814 }
1815
1816 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1817
1818 memmove(
1819 self->extra->children + cur - i,
1820 self->extra->children + cur + 1,
1821 num_moved * sizeof(PyObject *));
1822 }
1823
1824 /* Leftover "tail" after the last removed child */
1825 cur = start + (size_t)slicelen * step;
1826 if (cur < (size_t)self->extra->length) {
1827 memmove(
1828 self->extra->children + cur - slicelen,
1829 self->extra->children + cur,
1830 (self->extra->length - cur) * sizeof(PyObject *));
1831 }
1832
1833 self->extra->length -= slicelen;
1834
1835 /* Discard the recycle list with all the deleted sub-elements */
1836 Py_XDECREF(recycle);
1837 return 0;
1838 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001839
1840 /* A new slice is actually being assigned */
1841 seq = PySequence_Fast(value, "");
1842 if (!seq) {
1843 PyErr_Format(
1844 PyExc_TypeError,
1845 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1846 );
1847 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001848 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001849 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850
1851 if (step != 1 && newlen != slicelen)
1852 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001853 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 "attempt to assign sequence of size %zd "
1856 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001857 newlen, slicelen
1858 );
1859 return -1;
1860 }
1861
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001862 /* Resize before creating the recycle bin, to prevent refleaks. */
1863 if (newlen > slicelen) {
1864 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001865 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866 return -1;
1867 }
1868 }
1869
1870 if (slicelen > 0) {
1871 /* to avoid recursive calls to this method (via decref), move
1872 old items to the recycle bin here, and get rid of them when
1873 we're done modifying the element */
1874 recycle = PyList_New(slicelen);
1875 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001876 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001877 return -1;
1878 }
1879 for (cur = start, i = 0; i < slicelen;
1880 cur += step, i++)
1881 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1882 }
1883
1884 if (newlen < slicelen) {
1885 /* delete slice */
1886 for (i = stop; i < self->extra->length; i++)
1887 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1888 } else if (newlen > slicelen) {
1889 /* insert slice */
1890 for (i = self->extra->length-1; i >= stop; i--)
1891 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1892 }
1893
1894 /* replace the slice */
1895 for (cur = start, i = 0; i < newlen;
1896 cur += step, i++) {
1897 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1898 Py_INCREF(element);
1899 self->extra->children[cur] = element;
1900 }
1901
1902 self->extra->length += newlen - slicelen;
1903
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001904 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001905
1906 /* discard the recycle bin, and everything in it */
1907 Py_XDECREF(recycle);
1908
1909 return 0;
1910 }
1911 else {
1912 PyErr_SetString(PyExc_TypeError,
1913 "element indices must be integers");
1914 return -1;
1915 }
1916}
1917
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001919element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001920{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001921 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001922 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001923 return res;
1924}
1925
Serhiy Storchakadde08152015-11-25 15:28:13 +02001926static PyObject*
1927element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001928{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001929 PyObject *res = element_get_text(self);
1930 Py_XINCREF(res);
1931 return res;
1932}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001933
Serhiy Storchakadde08152015-11-25 15:28:13 +02001934static PyObject*
1935element_tail_getter(ElementObject *self, void *closure)
1936{
1937 PyObject *res = element_get_tail(self);
1938 Py_XINCREF(res);
1939 return res;
1940}
1941
1942static PyObject*
1943element_attrib_getter(ElementObject *self, void *closure)
1944{
1945 PyObject *res;
1946 if (!self->extra) {
1947 if (create_extra(self, NULL) < 0)
1948 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001949 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001950 res = element_get_attrib(self);
1951 Py_XINCREF(res);
1952 return res;
1953}
Victor Stinner4d463432013-07-11 23:05:03 +02001954
Serhiy Storchakadde08152015-11-25 15:28:13 +02001955/* macro for setter validation */
1956#define _VALIDATE_ATTR_VALUE(V) \
1957 if ((V) == NULL) { \
1958 PyErr_SetString( \
1959 PyExc_AttributeError, \
1960 "can't delete element attribute"); \
1961 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 }
1963
Serhiy Storchakadde08152015-11-25 15:28:13 +02001964static int
1965element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1966{
1967 _VALIDATE_ATTR_VALUE(value);
1968 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001969 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001970 return 0;
1971}
1972
1973static int
1974element_text_setter(ElementObject *self, PyObject *value, void *closure)
1975{
1976 _VALIDATE_ATTR_VALUE(value);
1977 Py_INCREF(value);
1978 Py_DECREF(JOIN_OBJ(self->text));
1979 self->text = value;
1980 return 0;
1981}
1982
1983static int
1984element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1985{
1986 _VALIDATE_ATTR_VALUE(value);
1987 Py_INCREF(value);
1988 Py_DECREF(JOIN_OBJ(self->tail));
1989 self->tail = value;
1990 return 0;
1991}
1992
1993static int
1994element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1995{
1996 _VALIDATE_ATTR_VALUE(value);
1997 if (!self->extra) {
1998 if (create_extra(self, NULL) < 0)
1999 return -1;
2000 }
2001 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002002 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002003 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002004}
2005
2006static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008 0, /* sq_concat */
2009 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002010 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002011 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002012 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002013 0,
2014};
2015
Eli Bendersky64d11e62012-06-15 07:42:50 +03002016/******************************* Element iterator ****************************/
2017
2018/* ElementIterObject represents the iteration state over an XML element in
2019 * pre-order traversal. To keep track of which sub-element should be returned
2020 * next, a stack of parents is maintained. This is a standard stack-based
2021 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002022 * The stack is managed using a continuous array.
2023 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002024 * the current one is exhausted, and the next child to examine in that parent.
2025 */
2026typedef struct ParentLocator_t {
2027 ElementObject *parent;
2028 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029} ParentLocator;
2030
2031typedef struct {
2032 PyObject_HEAD
2033 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002034 Py_ssize_t parent_stack_used;
2035 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002036 ElementObject *root_element;
2037 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002038 int gettext;
2039} ElementIterObject;
2040
2041
2042static void
2043elementiter_dealloc(ElementIterObject *it)
2044{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002045 Py_ssize_t i = it->parent_stack_used;
2046 it->parent_stack_used = 0;
2047 while (i--)
2048 Py_XDECREF(it->parent_stack[i].parent);
2049 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002050
2051 Py_XDECREF(it->sought_tag);
2052 Py_XDECREF(it->root_element);
2053
2054 PyObject_GC_UnTrack(it);
2055 PyObject_GC_Del(it);
2056}
2057
2058static int
2059elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2060{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002061 Py_ssize_t i = it->parent_stack_used;
2062 while (i--)
2063 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002064
2065 Py_VISIT(it->root_element);
2066 Py_VISIT(it->sought_tag);
2067 return 0;
2068}
2069
2070/* Helper function for elementiter_next. Add a new parent to the parent stack.
2071 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002072static int
2073parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002074{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002075 ParentLocator *item;
2076
2077 if (it->parent_stack_used >= it->parent_stack_size) {
2078 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2079 ParentLocator *parent_stack = it->parent_stack;
2080 PyMem_Resize(parent_stack, ParentLocator, new_size);
2081 if (parent_stack == NULL)
2082 return -1;
2083 it->parent_stack = parent_stack;
2084 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002085 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002086 item = it->parent_stack + it->parent_stack_used++;
2087 Py_INCREF(parent);
2088 item->parent = parent;
2089 item->child_index = 0;
2090 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002091}
2092
2093static PyObject *
2094elementiter_next(ElementIterObject *it)
2095{
2096 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002097 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 * A short note on gettext: this function serves both the iter() and
2099 * itertext() methods to avoid code duplication. However, there are a few
2100 * small differences in the way these iterations work. Namely:
2101 * - itertext() only yields text from nodes that have it, and continues
2102 * iterating when a node doesn't have text (so it doesn't return any
2103 * node like iter())
2104 * - itertext() also has to handle tail, after finishing with all the
2105 * children of a node.
2106 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002107 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002108 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002109 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002110
2111 while (1) {
2112 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002113 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114 * iterator is exhausted.
2115 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002116 if (!it->parent_stack_used) {
2117 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118 PyErr_SetNone(PyExc_StopIteration);
2119 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120 }
2121
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122 elem = it->root_element; /* steals a reference */
2123 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 }
2125 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002126 /* See if there are children left to traverse in the current parent. If
2127 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002128 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002129 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2130 Py_ssize_t child_index = item->child_index;
2131 ElementObjectExtra *extra;
2132 elem = item->parent;
2133 extra = elem->extra;
2134 if (!extra || child_index >= extra->length) {
2135 it->parent_stack_used--;
2136 /* Note that extra condition on it->parent_stack_used here;
2137 * this is because itertext() is supposed to only return *inner*
2138 * text, not text following the element it began iteration with.
2139 */
2140 if (it->gettext && it->parent_stack_used) {
2141 text = element_get_tail(elem);
2142 goto gettext;
2143 }
2144 Py_DECREF(elem);
2145 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002146 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002147
2148 elem = (ElementObject *)extra->children[child_index];
2149 item->child_index++;
2150 Py_INCREF(elem);
2151 }
2152
2153 if (parent_stack_push_new(it, elem) < 0) {
2154 Py_DECREF(elem);
2155 PyErr_NoMemory();
2156 return NULL;
2157 }
2158 if (it->gettext) {
2159 text = element_get_text(elem);
2160 goto gettext;
2161 }
2162
2163 if (it->sought_tag == Py_None)
2164 return (PyObject *)elem;
2165
2166 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2167 if (rc > 0)
2168 return (PyObject *)elem;
2169
2170 Py_DECREF(elem);
2171 if (rc < 0)
2172 return NULL;
2173 continue;
2174
2175gettext:
2176 if (!text) {
2177 Py_DECREF(elem);
2178 return NULL;
2179 }
2180 if (text == Py_None) {
2181 Py_DECREF(elem);
2182 }
2183 else {
2184 Py_INCREF(text);
2185 Py_DECREF(elem);
2186 rc = PyObject_IsTrue(text);
2187 if (rc > 0)
2188 return text;
2189 Py_DECREF(text);
2190 if (rc < 0)
2191 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002192 }
2193 }
2194
2195 return NULL;
2196}
2197
2198
2199static PyTypeObject ElementIter_Type = {
2200 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002201 /* Using the module's name since the pure-Python implementation does not
2202 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002203 "_elementtree._element_iterator", /* tp_name */
2204 sizeof(ElementIterObject), /* tp_basicsize */
2205 0, /* tp_itemsize */
2206 /* methods */
2207 (destructor)elementiter_dealloc, /* tp_dealloc */
2208 0, /* tp_print */
2209 0, /* tp_getattr */
2210 0, /* tp_setattr */
2211 0, /* tp_reserved */
2212 0, /* tp_repr */
2213 0, /* tp_as_number */
2214 0, /* tp_as_sequence */
2215 0, /* tp_as_mapping */
2216 0, /* tp_hash */
2217 0, /* tp_call */
2218 0, /* tp_str */
2219 0, /* tp_getattro */
2220 0, /* tp_setattro */
2221 0, /* tp_as_buffer */
2222 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2223 0, /* tp_doc */
2224 (traverseproc)elementiter_traverse, /* tp_traverse */
2225 0, /* tp_clear */
2226 0, /* tp_richcompare */
2227 0, /* tp_weaklistoffset */
2228 PyObject_SelfIter, /* tp_iter */
2229 (iternextfunc)elementiter_next, /* tp_iternext */
2230 0, /* tp_methods */
2231 0, /* tp_members */
2232 0, /* tp_getset */
2233 0, /* tp_base */
2234 0, /* tp_dict */
2235 0, /* tp_descr_get */
2236 0, /* tp_descr_set */
2237 0, /* tp_dictoffset */
2238 0, /* tp_init */
2239 0, /* tp_alloc */
2240 0, /* tp_new */
2241};
2242
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002243#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002244
2245static PyObject *
2246create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2247{
2248 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002249
2250 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2251 if (!it)
2252 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002253
Victor Stinner4d463432013-07-11 23:05:03 +02002254 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002255 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002256 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002257 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002258 it->root_element = self;
2259
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002261
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002262 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002263 if (it->parent_stack == NULL) {
2264 Py_DECREF(it);
2265 PyErr_NoMemory();
2266 return NULL;
2267 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002268 it->parent_stack_used = 0;
2269 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002270
Eli Bendersky64d11e62012-06-15 07:42:50 +03002271 return (PyObject *)it;
2272}
2273
2274
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275/* ==================================================================== */
2276/* the tree builder type */
2277
2278typedef struct {
2279 PyObject_HEAD
2280
Eli Bendersky58d548d2012-05-29 15:45:16 +03002281 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
Antoine Pitrouee329312012-10-04 19:53:29 +02002283 PyObject *this; /* current node */
2284 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
Eli Bendersky58d548d2012-05-29 15:45:16 +03002286 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
Eli Bendersky58d548d2012-05-29 15:45:16 +03002288 PyObject *stack; /* element stack */
2289 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290
Eli Bendersky48d358b2012-05-30 17:57:50 +03002291 PyObject *element_factory;
2292
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002294 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002295 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2296 PyObject *end_event_obj;
2297 PyObject *start_ns_event_obj;
2298 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002299} TreeBuilderObject;
2300
Christian Heimes90aa7642007-12-19 02:45:37 +00002301#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302
2303/* -------------------------------------------------------------------- */
2304/* constructor and destructor */
2305
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306static PyObject *
2307treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002308{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002309 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2310 if (t != NULL) {
2311 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002312
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002314 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002316 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002317
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002319 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002320 t->stack = PyList_New(20);
2321 if (!t->stack) {
2322 Py_DECREF(t->this);
2323 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002324 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002325 return NULL;
2326 }
2327 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002329 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002330 t->start_event_obj = t->end_event_obj = NULL;
2331 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2332 }
2333 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334}
2335
Serhiy Storchakacb985562015-05-04 15:32:48 +03002336/*[clinic input]
2337_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002338
Serhiy Storchakacb985562015-05-04 15:32:48 +03002339 element_factory: object = NULL
2340
2341[clinic start generated code]*/
2342
2343static int
2344_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2345 PyObject *element_factory)
2346/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2347{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002348 if (element_factory) {
2349 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002350 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002351 }
2352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354}
2355
Eli Bendersky48d358b2012-05-30 17:57:50 +03002356static int
2357treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2358{
2359 Py_VISIT(self->root);
2360 Py_VISIT(self->this);
2361 Py_VISIT(self->last);
2362 Py_VISIT(self->data);
2363 Py_VISIT(self->stack);
2364 Py_VISIT(self->element_factory);
2365 return 0;
2366}
2367
2368static int
2369treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002371 Py_CLEAR(self->end_ns_event_obj);
2372 Py_CLEAR(self->start_ns_event_obj);
2373 Py_CLEAR(self->end_event_obj);
2374 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002375 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002376 Py_CLEAR(self->stack);
2377 Py_CLEAR(self->data);
2378 Py_CLEAR(self->last);
2379 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002380 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002381 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 return 0;
2383}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385static void
2386treebuilder_dealloc(TreeBuilderObject *self)
2387{
2388 PyObject_GC_UnTrack(self);
2389 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391}
2392
2393/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002394/* helpers for handling of arbitrary element-like objects */
2395
2396static int
2397treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2398 PyObject **dest, _Py_Identifier *name)
2399{
2400 if (Element_CheckExact(element)) {
2401 Py_DECREF(JOIN_OBJ(*dest));
2402 *dest = JOIN_SET(data, PyList_CheckExact(data));
2403 return 0;
2404 }
2405 else {
2406 PyObject *joined = list_join(data);
2407 int r;
2408 if (joined == NULL)
2409 return -1;
2410 r = _PyObject_SetAttrId(element, name, joined);
2411 Py_DECREF(joined);
2412 return r;
2413 }
2414}
2415
2416/* These two functions steal a reference to data */
2417static int
2418treebuilder_set_element_text(PyObject *element, PyObject *data)
2419{
2420 _Py_IDENTIFIER(text);
2421 return treebuilder_set_element_text_or_tail(
2422 element, data, &((ElementObject *) element)->text, &PyId_text);
2423}
2424
2425static int
2426treebuilder_set_element_tail(PyObject *element, PyObject *data)
2427{
2428 _Py_IDENTIFIER(tail);
2429 return treebuilder_set_element_text_or_tail(
2430 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2431}
2432
2433static int
2434treebuilder_add_subelement(PyObject *element, PyObject *child)
2435{
2436 _Py_IDENTIFIER(append);
2437 if (Element_CheckExact(element)) {
2438 ElementObject *elem = (ElementObject *) element;
2439 return element_add_subelement(elem, child);
2440 }
2441 else {
2442 PyObject *res;
2443 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2444 if (res == NULL)
2445 return -1;
2446 Py_DECREF(res);
2447 return 0;
2448 }
2449}
2450
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002451LOCAL(int)
2452treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2453 PyObject *node)
2454{
2455 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002456 PyObject *res;
2457 PyObject *event = PyTuple_Pack(2, action, node);
2458 if (event == NULL)
2459 return -1;
2460 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2461 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002462 if (res == NULL)
2463 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002464 Py_DECREF(res);
2465 }
2466 return 0;
2467}
2468
Antoine Pitrouee329312012-10-04 19:53:29 +02002469/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470/* handlers */
2471
2472LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002473treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2474 PyObject* attrib)
2475{
2476 PyObject* node;
2477 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002478 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479
2480 if (self->data) {
2481 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002482 if (treebuilder_set_element_text(self->last, self->data))
2483 return NULL;
2484 }
2485 else {
2486 if (treebuilder_set_element_tail(self->last, self->data))
2487 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488 }
2489 self->data = NULL;
2490 }
2491
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002492 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002493 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002494 } else if (attrib == Py_None) {
2495 attrib = PyDict_New();
2496 if (!attrib)
2497 return NULL;
2498 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2499 Py_DECREF(attrib);
2500 }
2501 else {
2502 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002503 }
2504 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002506 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507
Antoine Pitrouee329312012-10-04 19:53:29 +02002508 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509
2510 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002511 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002512 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 } else {
2514 if (self->root) {
2515 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002516 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 "multiple elements on top level"
2518 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002519 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 }
2521 Py_INCREF(node);
2522 self->root = node;
2523 }
2524
2525 if (self->index < PyList_GET_SIZE(self->stack)) {
2526 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002527 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 Py_INCREF(this);
2529 } else {
2530 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 }
2533 self->index++;
2534
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002536 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002538 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002540 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2541 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542
2543 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002544
2545 error:
2546 Py_DECREF(node);
2547 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548}
2549
2550LOCAL(PyObject*)
2551treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2552{
2553 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002554 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002555 /* ignore calls to data before the first call to start */
2556 Py_RETURN_NONE;
2557 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 /* store the first item as is */
2559 Py_INCREF(data); self->data = data;
2560 } else {
2561 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002562 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2563 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002564 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002565 /* expat often generates single character data sections; handle
2566 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002567 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2568 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002570 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 } else if (PyList_CheckExact(self->data)) {
2572 if (PyList_Append(self->data, data) < 0)
2573 return NULL;
2574 } else {
2575 PyObject* list = PyList_New(2);
2576 if (!list)
2577 return NULL;
2578 PyList_SET_ITEM(list, 0, self->data);
2579 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2580 self->data = list;
2581 }
2582 }
2583
2584 Py_RETURN_NONE;
2585}
2586
2587LOCAL(PyObject*)
2588treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2589{
2590 PyObject* item;
2591
2592 if (self->data) {
2593 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002594 if (treebuilder_set_element_text(self->last, self->data))
2595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002597 if (treebuilder_set_element_tail(self->last, self->data))
2598 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002599 }
2600 self->data = NULL;
2601 }
2602
2603 if (self->index == 0) {
2604 PyErr_SetString(
2605 PyExc_IndexError,
2606 "pop from empty stack"
2607 );
2608 return NULL;
2609 }
2610
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002611 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002612 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002613 self->index--;
2614 self->this = PyList_GET_ITEM(self->stack, self->index);
2615 Py_INCREF(self->this);
2616 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002618 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2619 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002620
2621 Py_INCREF(self->last);
2622 return (PyObject*) self->last;
2623}
2624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625/* -------------------------------------------------------------------- */
2626/* methods (in alphabetical order) */
2627
Serhiy Storchakacb985562015-05-04 15:32:48 +03002628/*[clinic input]
2629_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630
Serhiy Storchakacb985562015-05-04 15:32:48 +03002631 data: object
2632 /
2633
2634[clinic start generated code]*/
2635
2636static PyObject *
2637_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2638/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2639{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640 return treebuilder_handle_data(self, data);
2641}
2642
Serhiy Storchakacb985562015-05-04 15:32:48 +03002643/*[clinic input]
2644_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645
Serhiy Storchakacb985562015-05-04 15:32:48 +03002646 tag: object
2647 /
2648
2649[clinic start generated code]*/
2650
2651static PyObject *
2652_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2653/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2654{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002655 return treebuilder_handle_end(self, tag);
2656}
2657
2658LOCAL(PyObject*)
2659treebuilder_done(TreeBuilderObject* self)
2660{
2661 PyObject* res;
2662
2663 /* FIXME: check stack size? */
2664
2665 if (self->root)
2666 res = self->root;
2667 else
2668 res = Py_None;
2669
2670 Py_INCREF(res);
2671 return res;
2672}
2673
Serhiy Storchakacb985562015-05-04 15:32:48 +03002674/*[clinic input]
2675_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676
Serhiy Storchakacb985562015-05-04 15:32:48 +03002677[clinic start generated code]*/
2678
2679static PyObject *
2680_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2681/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2682{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683 return treebuilder_done(self);
2684}
2685
Serhiy Storchakacb985562015-05-04 15:32:48 +03002686/*[clinic input]
2687_elementtree.TreeBuilder.start
2688
2689 tag: object
2690 attrs: object = None
2691 /
2692
2693[clinic start generated code]*/
2694
2695static PyObject *
2696_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2697 PyObject *attrs)
2698/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002700 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701}
2702
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703/* ==================================================================== */
2704/* the expat interface */
2705
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002708
2709/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2710 * cached globally without being in per-module state.
2711 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002712static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714
Eli Bendersky52467b12012-06-01 07:13:08 +03002715static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2716 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2717
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718typedef struct {
2719 PyObject_HEAD
2720
2721 XML_Parser parser;
2722
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002723 PyObject *target;
2724 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002726 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002728 PyObject *handle_start;
2729 PyObject *handle_data;
2730 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *handle_comment;
2733 PyObject *handle_pi;
2734 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002736 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002737
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738} XMLParserObject;
2739
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002740static PyObject*
2741_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2742static PyObject *
2743_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2744 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002745
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746/* helpers */
2747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748LOCAL(PyObject*)
2749makeuniversal(XMLParserObject* self, const char* string)
2750{
2751 /* convert a UTF-8 tag/attribute name from the expat parser
2752 to a universal name string */
2753
Antoine Pitrouc1948842012-10-01 23:40:37 +02002754 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755 PyObject* key;
2756 PyObject* value;
2757
2758 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002759 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760 if (!key)
2761 return NULL;
2762
2763 value = PyDict_GetItem(self->names, key);
2764
2765 if (value) {
2766 Py_INCREF(value);
2767 } else {
2768 /* new name. convert to universal name, and decode as
2769 necessary */
2770
2771 PyObject* tag;
2772 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002773 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774
2775 /* look for namespace separator */
2776 for (i = 0; i < size; i++)
2777 if (string[i] == '}')
2778 break;
2779 if (i != size) {
2780 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002781 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002782 if (tag == NULL) {
2783 Py_DECREF(key);
2784 return NULL;
2785 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002786 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 p[0] = '{';
2788 memcpy(p+1, string, size);
2789 size++;
2790 } else {
2791 /* plain name; use key as tag */
2792 Py_INCREF(key);
2793 tag = key;
2794 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002796 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002797 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002798 value = PyUnicode_DecodeUTF8(p, size, "strict");
2799 Py_DECREF(tag);
2800 if (!value) {
2801 Py_DECREF(key);
2802 return NULL;
2803 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804
2805 /* add to names dictionary */
2806 if (PyDict_SetItem(self->names, key, value) < 0) {
2807 Py_DECREF(key);
2808 Py_DECREF(value);
2809 return NULL;
2810 }
2811 }
2812
2813 Py_DECREF(key);
2814 return value;
2815}
2816
Eli Bendersky5b77d812012-03-16 08:20:05 +02002817/* Set the ParseError exception with the given parameters.
2818 * If message is not NULL, it's used as the error string. Otherwise, the
2819 * message string is the default for the given error_code.
2820*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002821static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002822expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2823 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002825 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002826 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002827
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002828 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002829 message ? message : EXPAT(ErrorString)(error_code),
2830 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002831 if (errmsg == NULL)
2832 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002833
Eli Bendersky532d03e2013-08-10 08:00:39 -07002834 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002835 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002836 if (!error)
2837 return;
2838
Eli Bendersky5b77d812012-03-16 08:20:05 +02002839 /* Add code and position attributes */
2840 code = PyLong_FromLong((long)error_code);
2841 if (!code) {
2842 Py_DECREF(error);
2843 return;
2844 }
2845 if (PyObject_SetAttrString(error, "code", code) == -1) {
2846 Py_DECREF(error);
2847 Py_DECREF(code);
2848 return;
2849 }
2850 Py_DECREF(code);
2851
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002852 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002853 if (!position) {
2854 Py_DECREF(error);
2855 return;
2856 }
2857 if (PyObject_SetAttrString(error, "position", position) == -1) {
2858 Py_DECREF(error);
2859 Py_DECREF(position);
2860 return;
2861 }
2862 Py_DECREF(position);
2863
Eli Bendersky532d03e2013-08-10 08:00:39 -07002864 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002865 Py_DECREF(error);
2866}
2867
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002868/* -------------------------------------------------------------------- */
2869/* handlers */
2870
2871static void
2872expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2873 int data_len)
2874{
2875 PyObject* key;
2876 PyObject* value;
2877 PyObject* res;
2878
2879 if (data_len < 2 || data_in[0] != '&')
2880 return;
2881
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002882 if (PyErr_Occurred())
2883 return;
2884
Neal Norwitz0269b912007-08-08 06:56:02 +00002885 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002886 if (!key)
2887 return;
2888
2889 value = PyDict_GetItem(self->entity, key);
2890
2891 if (value) {
2892 if (TreeBuilder_CheckExact(self->target))
2893 res = treebuilder_handle_data(
2894 (TreeBuilderObject*) self->target, value
2895 );
2896 else if (self->handle_data)
2897 res = PyObject_CallFunction(self->handle_data, "O", value);
2898 else
2899 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002900 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002901 } else if (!PyErr_Occurred()) {
2902 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002903 char message[128] = "undefined entity ";
2904 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002905 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002906 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002908 EXPAT(GetErrorColumnNumber)(self->parser),
2909 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910 );
2911 }
2912
2913 Py_DECREF(key);
2914}
2915
2916static void
2917expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2918 const XML_Char **attrib_in)
2919{
2920 PyObject* res;
2921 PyObject* tag;
2922 PyObject* attrib;
2923 int ok;
2924
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002925 if (PyErr_Occurred())
2926 return;
2927
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 /* tag name */
2929 tag = makeuniversal(self, tag_in);
2930 if (!tag)
2931 return; /* parser will look for errors */
2932
2933 /* attributes */
2934 if (attrib_in[0]) {
2935 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002936 if (!attrib) {
2937 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002939 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 while (attrib_in[0] && attrib_in[1]) {
2941 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002942 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 if (!key || !value) {
2944 Py_XDECREF(value);
2945 Py_XDECREF(key);
2946 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002947 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 return;
2949 }
2950 ok = PyDict_SetItem(attrib, key, value);
2951 Py_DECREF(value);
2952 Py_DECREF(key);
2953 if (ok < 0) {
2954 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002955 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 return;
2957 }
2958 attrib_in += 2;
2959 }
2960 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002961 Py_INCREF(Py_None);
2962 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002963 }
2964
2965 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 /* shortcut */
2967 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2968 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002969 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002970 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002971 if (attrib == Py_None) {
2972 Py_DECREF(attrib);
2973 attrib = PyDict_New();
2974 if (!attrib) {
2975 Py_DECREF(tag);
2976 return;
2977 }
2978 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002980 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 res = NULL;
2982
2983 Py_DECREF(tag);
2984 Py_DECREF(attrib);
2985
2986 Py_XDECREF(res);
2987}
2988
2989static void
2990expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2991 int data_len)
2992{
2993 PyObject* data;
2994 PyObject* res;
2995
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002996 if (PyErr_Occurred())
2997 return;
2998
Neal Norwitz0269b912007-08-08 06:56:02 +00002999 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003000 if (!data)
3001 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003002
3003 if (TreeBuilder_CheckExact(self->target))
3004 /* shortcut */
3005 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3006 else if (self->handle_data)
3007 res = PyObject_CallFunction(self->handle_data, "O", data);
3008 else
3009 res = NULL;
3010
3011 Py_DECREF(data);
3012
3013 Py_XDECREF(res);
3014}
3015
3016static void
3017expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3018{
3019 PyObject* tag;
3020 PyObject* res = NULL;
3021
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003022 if (PyErr_Occurred())
3023 return;
3024
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003025 if (TreeBuilder_CheckExact(self->target))
3026 /* shortcut */
3027 /* the standard tree builder doesn't look at the end tag */
3028 res = treebuilder_handle_end(
3029 (TreeBuilderObject*) self->target, Py_None
3030 );
3031 else if (self->handle_end) {
3032 tag = makeuniversal(self, tag_in);
3033 if (tag) {
3034 res = PyObject_CallFunction(self->handle_end, "O", tag);
3035 Py_DECREF(tag);
3036 }
3037 }
3038
3039 Py_XDECREF(res);
3040}
3041
3042static void
3043expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3044 const XML_Char *uri)
3045{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003046 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3047 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003048
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003049 if (PyErr_Occurred())
3050 return;
3051
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003052 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003053 return;
3054
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003055 if (!uri)
3056 uri = "";
3057 if (!prefix)
3058 prefix = "";
3059
3060 parcel = Py_BuildValue("ss", prefix, uri);
3061 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003063 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3064 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003065}
3066
3067static void
3068expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3069{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003070 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3071
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003072 if (PyErr_Occurred())
3073 return;
3074
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003075 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003076 return;
3077
3078 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079}
3080
3081static void
3082expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3083{
3084 PyObject* comment;
3085 PyObject* res;
3086
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003087 if (PyErr_Occurred())
3088 return;
3089
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003090 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003091 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092 if (comment) {
3093 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3094 Py_XDECREF(res);
3095 Py_DECREF(comment);
3096 }
3097 }
3098}
3099
Eli Bendersky45839902013-01-13 05:14:47 -08003100static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003101expat_start_doctype_handler(XMLParserObject *self,
3102 const XML_Char *doctype_name,
3103 const XML_Char *sysid,
3104 const XML_Char *pubid,
3105 int has_internal_subset)
3106{
3107 PyObject *self_pyobj = (PyObject *)self;
3108 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3109 PyObject *parser_doctype = NULL;
3110 PyObject *res = NULL;
3111
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003112 if (PyErr_Occurred())
3113 return;
3114
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003115 doctype_name_obj = makeuniversal(self, doctype_name);
3116 if (!doctype_name_obj)
3117 return;
3118
3119 if (sysid) {
3120 sysid_obj = makeuniversal(self, sysid);
3121 if (!sysid_obj) {
3122 Py_DECREF(doctype_name_obj);
3123 return;
3124 }
3125 } else {
3126 Py_INCREF(Py_None);
3127 sysid_obj = Py_None;
3128 }
3129
3130 if (pubid) {
3131 pubid_obj = makeuniversal(self, pubid);
3132 if (!pubid_obj) {
3133 Py_DECREF(doctype_name_obj);
3134 Py_DECREF(sysid_obj);
3135 return;
3136 }
3137 } else {
3138 Py_INCREF(Py_None);
3139 pubid_obj = Py_None;
3140 }
3141
3142 /* If the target has a handler for doctype, call it. */
3143 if (self->handle_doctype) {
3144 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3145 doctype_name_obj, pubid_obj, sysid_obj);
3146 Py_CLEAR(res);
3147 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003148 else {
3149 /* Now see if the parser itself has a doctype method. If yes and it's
3150 * a custom method, call it but warn about deprecation. If it's only
3151 * the vanilla XMLParser method, do nothing.
3152 */
3153 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3154 if (parser_doctype &&
3155 !(PyCFunction_Check(parser_doctype) &&
3156 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3157 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003158 (PyCFunction) _elementtree_XMLParser_doctype)) {
3159 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3160 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003161 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003162 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003163 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003164 res = PyObject_CallFunction(parser_doctype, "OOO",
3165 doctype_name_obj, pubid_obj, sysid_obj);
3166 Py_CLEAR(res);
3167 }
3168 }
3169
3170clear:
3171 Py_XDECREF(parser_doctype);
3172 Py_DECREF(doctype_name_obj);
3173 Py_DECREF(pubid_obj);
3174 Py_DECREF(sysid_obj);
3175}
3176
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003177static void
3178expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3179 const XML_Char* data_in)
3180{
3181 PyObject* target;
3182 PyObject* data;
3183 PyObject* res;
3184
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003185 if (PyErr_Occurred())
3186 return;
3187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003189 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3190 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003191 if (target && data) {
3192 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3193 Py_XDECREF(res);
3194 Py_DECREF(data);
3195 Py_DECREF(target);
3196 } else {
3197 Py_XDECREF(data);
3198 Py_XDECREF(target);
3199 }
3200 }
3201}
3202
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003203/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204
Eli Bendersky52467b12012-06-01 07:13:08 +03003205static PyObject *
3206xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207{
Eli Bendersky52467b12012-06-01 07:13:08 +03003208 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3209 if (self) {
3210 self->parser = NULL;
3211 self->target = self->entity = self->names = NULL;
3212 self->handle_start = self->handle_data = self->handle_end = NULL;
3213 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003214 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003216 return (PyObject *)self;
3217}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218
Serhiy Storchakacb985562015-05-04 15:32:48 +03003219/*[clinic input]
3220_elementtree.XMLParser.__init__
3221
3222 html: object = NULL
3223 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003224 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003225
3226[clinic start generated code]*/
3227
Eli Bendersky52467b12012-06-01 07:13:08 +03003228static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003229_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3230 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003231/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003232{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003233 self->entity = PyDict_New();
3234 if (!self->entity)
3235 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236
Serhiy Storchakacb985562015-05-04 15:32:48 +03003237 self->names = PyDict_New();
3238 if (!self->names) {
3239 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003241 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003242
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3244 if (!self->parser) {
3245 Py_CLEAR(self->entity);
3246 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 }
3250
Eli Bendersky52467b12012-06-01 07:13:08 +03003251 if (target) {
3252 Py_INCREF(target);
3253 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003254 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003256 Py_CLEAR(self->entity);
3257 Py_CLEAR(self->names);
3258 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003259 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003261 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003262 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263
Serhiy Storchakacb985562015-05-04 15:32:48 +03003264 self->handle_start = PyObject_GetAttrString(target, "start");
3265 self->handle_data = PyObject_GetAttrString(target, "data");
3266 self->handle_end = PyObject_GetAttrString(target, "end");
3267 self->handle_comment = PyObject_GetAttrString(target, "comment");
3268 self->handle_pi = PyObject_GetAttrString(target, "pi");
3269 self->handle_close = PyObject_GetAttrString(target, "close");
3270 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271
3272 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003273
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003275 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003277 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 (XML_StartElementHandler) expat_start_handler,
3279 (XML_EndElementHandler) expat_end_handler
3280 );
3281 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003282 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283 (XML_DefaultHandler) expat_default_handler
3284 );
3285 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 (XML_CharacterDataHandler) expat_data_handler
3288 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 (XML_CommentHandler) expat_comment_handler
3293 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003294 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 (XML_ProcessingInstructionHandler) expat_pi_handler
3298 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003299 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003300 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003301 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3302 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003305 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307
Eli Bendersky52467b12012-06-01 07:13:08 +03003308 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309}
3310
Eli Bendersky52467b12012-06-01 07:13:08 +03003311static int
3312xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3313{
3314 Py_VISIT(self->handle_close);
3315 Py_VISIT(self->handle_pi);
3316 Py_VISIT(self->handle_comment);
3317 Py_VISIT(self->handle_end);
3318 Py_VISIT(self->handle_data);
3319 Py_VISIT(self->handle_start);
3320
3321 Py_VISIT(self->target);
3322 Py_VISIT(self->entity);
3323 Py_VISIT(self->names);
3324
3325 return 0;
3326}
3327
3328static int
3329xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330{
3331 EXPAT(ParserFree)(self->parser);
3332
Antoine Pitrouc1948842012-10-01 23:40:37 +02003333 Py_CLEAR(self->handle_close);
3334 Py_CLEAR(self->handle_pi);
3335 Py_CLEAR(self->handle_comment);
3336 Py_CLEAR(self->handle_end);
3337 Py_CLEAR(self->handle_data);
3338 Py_CLEAR(self->handle_start);
3339 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340
Antoine Pitrouc1948842012-10-01 23:40:37 +02003341 Py_CLEAR(self->target);
3342 Py_CLEAR(self->entity);
3343 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344
Eli Bendersky52467b12012-06-01 07:13:08 +03003345 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346}
3347
Eli Bendersky52467b12012-06-01 07:13:08 +03003348static void
3349xmlparser_dealloc(XMLParserObject* self)
3350{
3351 PyObject_GC_UnTrack(self);
3352 xmlparser_gc_clear(self);
3353 Py_TYPE(self)->tp_free((PyObject *)self);
3354}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355
3356LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003357expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358{
3359 int ok;
3360
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003361 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3363
3364 if (PyErr_Occurred())
3365 return NULL;
3366
3367 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003368 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003369 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003371 EXPAT(GetErrorColumnNumber)(self->parser),
3372 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373 );
3374 return NULL;
3375 }
3376
3377 Py_RETURN_NONE;
3378}
3379
Serhiy Storchakacb985562015-05-04 15:32:48 +03003380/*[clinic input]
3381_elementtree.XMLParser.close
3382
3383[clinic start generated code]*/
3384
3385static PyObject *
3386_elementtree_XMLParser_close_impl(XMLParserObject *self)
3387/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388{
3389 /* end feeding data to parser */
3390
3391 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003393 if (!res)
3394 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003395
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003396 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 Py_DECREF(res);
3398 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003399 }
3400 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003401 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003402 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003403 }
3404 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003405 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003406 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003407}
3408
Serhiy Storchakacb985562015-05-04 15:32:48 +03003409/*[clinic input]
3410_elementtree.XMLParser.feed
3411
3412 data: object
3413 /
3414
3415[clinic start generated code]*/
3416
3417static PyObject *
3418_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3419/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003420{
3421 /* feed data to parser */
3422
Serhiy Storchakacb985562015-05-04 15:32:48 +03003423 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003424 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003425 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3426 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003427 return NULL;
3428 if (data_len > INT_MAX) {
3429 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3430 return NULL;
3431 }
3432 /* Explicitly set UTF-8 encoding. Return code ignored. */
3433 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003434 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003435 }
3436 else {
3437 Py_buffer view;
3438 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003439 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003440 return NULL;
3441 if (view.len > INT_MAX) {
3442 PyBuffer_Release(&view);
3443 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3444 return NULL;
3445 }
3446 res = expat_parse(self, view.buf, (int)view.len, 0);
3447 PyBuffer_Release(&view);
3448 return res;
3449 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003450}
3451
Serhiy Storchakacb985562015-05-04 15:32:48 +03003452/*[clinic input]
3453_elementtree.XMLParser._parse_whole
3454
3455 file: object
3456 /
3457
3458[clinic start generated code]*/
3459
3460static PyObject *
3461_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3462/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003463{
Eli Benderskya3699232013-05-19 18:47:23 -07003464 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465 PyObject* reader;
3466 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003467 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468 PyObject* res;
3469
Serhiy Storchakacb985562015-05-04 15:32:48 +03003470 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471 if (!reader)
3472 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003473
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474 /* read from open file object */
3475 for (;;) {
3476
3477 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3478
3479 if (!buffer) {
3480 /* read failed (e.g. due to KeyboardInterrupt) */
3481 Py_DECREF(reader);
3482 return NULL;
3483 }
3484
Eli Benderskyf996e772012-03-16 05:53:30 +02003485 if (PyUnicode_CheckExact(buffer)) {
3486 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003487 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003488 Py_DECREF(buffer);
3489 break;
3490 }
3491 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003492 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003493 if (!temp) {
3494 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 Py_DECREF(reader);
3496 return NULL;
3497 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003498 buffer = temp;
3499 }
3500 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501 Py_DECREF(buffer);
3502 break;
3503 }
3504
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003505 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3506 Py_DECREF(buffer);
3507 Py_DECREF(reader);
3508 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3509 return NULL;
3510 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003511 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003512 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003513 );
3514
3515 Py_DECREF(buffer);
3516
3517 if (!res) {
3518 Py_DECREF(reader);
3519 return NULL;
3520 }
3521 Py_DECREF(res);
3522
3523 }
3524
3525 Py_DECREF(reader);
3526
3527 res = expat_parse(self, "", 0, 1);
3528
3529 if (res && TreeBuilder_CheckExact(self->target)) {
3530 Py_DECREF(res);
3531 return treebuilder_done((TreeBuilderObject*) self->target);
3532 }
3533
3534 return res;
3535}
3536
Serhiy Storchakacb985562015-05-04 15:32:48 +03003537/*[clinic input]
3538_elementtree.XMLParser.doctype
3539
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003540 name: object
3541 pubid: object
3542 system: object
3543 /
3544
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545[clinic start generated code]*/
3546
3547static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003548_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3549 PyObject *pubid, PyObject *system)
3550/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003551{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003552 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3553 "This method of XMLParser is deprecated. Define"
3554 " doctype() method on the TreeBuilder target.",
3555 1) < 0) {
3556 return NULL;
3557 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003558 Py_RETURN_NONE;
3559}
3560
Serhiy Storchakacb985562015-05-04 15:32:48 +03003561/*[clinic input]
3562_elementtree.XMLParser._setevents
3563
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003564 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003565 events_to_report: object = None
3566 /
3567
3568[clinic start generated code]*/
3569
3570static PyObject *
3571_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3572 PyObject *events_queue,
3573 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003574/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003575{
3576 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003577 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003578 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003579 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580
3581 if (!TreeBuilder_CheckExact(self->target)) {
3582 PyErr_SetString(
3583 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003584 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585 "targets"
3586 );
3587 return NULL;
3588 }
3589
3590 target = (TreeBuilderObject*) self->target;
3591
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003592 events_append = PyObject_GetAttrString(events_queue, "append");
3593 if (events_append == NULL)
3594 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003595 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003596
3597 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003598 Py_CLEAR(target->start_event_obj);
3599 Py_CLEAR(target->end_event_obj);
3600 Py_CLEAR(target->start_ns_event_obj);
3601 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003603 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003605 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606 Py_RETURN_NONE;
3607 }
3608
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003609 if (!(events_seq = PySequence_Fast(events_to_report,
3610 "events must be a sequence"))) {
3611 return NULL;
3612 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003614 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003615 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003616 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003617 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003618 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 } else if (PyBytes_Check(event_name_obj)) {
3620 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003621 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003622 if (event_name == NULL) {
3623 Py_DECREF(events_seq);
3624 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3625 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003626 }
3627
3628 Py_INCREF(event_name_obj);
3629 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003630 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003631 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003632 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003634 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635 EXPAT(SetNamespaceDeclHandler)(
3636 self->parser,
3637 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3638 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3639 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003640 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003641 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642 EXPAT(SetNamespaceDeclHandler)(
3643 self->parser,
3644 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3645 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3646 );
3647 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003648 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003649 Py_DECREF(events_seq);
3650 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003651 return NULL;
3652 }
3653 }
3654
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003655 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657}
3658
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003659static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003660xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003661{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003662 if (PyUnicode_Check(nameobj)) {
3663 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003664 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003665 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003666 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003667 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003668 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003669 return PyUnicode_FromFormat(
3670 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003672 }
3673 else
3674 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675
Alexander Belopolskye239d232010-12-08 23:31:48 +00003676 Py_INCREF(res);
3677 return res;
3678 }
3679 generic:
3680 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681}
3682
Serhiy Storchakacb985562015-05-04 15:32:48 +03003683#include "clinic/_elementtree.c.h"
3684
3685static PyMethodDef element_methods[] = {
3686
3687 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3688
3689 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3690 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3691
3692 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3693 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3694 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3695
3696 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3697 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3698 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3699 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3700
3701 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3702 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3703 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3704
Victor Stinner84d8baa2016-09-29 22:12:35 +02003705 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003706 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3707
3708 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3709 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3710
3711 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3712
3713 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3714 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3715 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3716 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3717 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3718
3719 {NULL, NULL}
3720};
3721
3722static PyMappingMethods element_as_mapping = {
3723 (lenfunc) element_length,
3724 (binaryfunc) element_subscr,
3725 (objobjargproc) element_ass_subscr,
3726};
3727
Serhiy Storchakadde08152015-11-25 15:28:13 +02003728static PyGetSetDef element_getsetlist[] = {
3729 {"tag",
3730 (getter)element_tag_getter,
3731 (setter)element_tag_setter,
3732 "A string identifying what kind of data this element represents"},
3733 {"text",
3734 (getter)element_text_getter,
3735 (setter)element_text_setter,
3736 "A string of text directly after the start tag, or None"},
3737 {"tail",
3738 (getter)element_tail_getter,
3739 (setter)element_tail_setter,
3740 "A string of text directly after the end tag, or None"},
3741 {"attrib",
3742 (getter)element_attrib_getter,
3743 (setter)element_attrib_setter,
3744 "A dictionary containing the element's attributes"},
3745 {NULL},
3746};
3747
Serhiy Storchakacb985562015-05-04 15:32:48 +03003748static PyTypeObject Element_Type = {
3749 PyVarObject_HEAD_INIT(NULL, 0)
3750 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3751 /* methods */
3752 (destructor)element_dealloc, /* tp_dealloc */
3753 0, /* tp_print */
3754 0, /* tp_getattr */
3755 0, /* tp_setattr */
3756 0, /* tp_reserved */
3757 (reprfunc)element_repr, /* tp_repr */
3758 0, /* tp_as_number */
3759 &element_as_sequence, /* tp_as_sequence */
3760 &element_as_mapping, /* tp_as_mapping */
3761 0, /* tp_hash */
3762 0, /* tp_call */
3763 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003764 PyObject_GenericGetAttr, /* tp_getattro */
3765 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003766 0, /* tp_as_buffer */
3767 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3768 /* tp_flags */
3769 0, /* tp_doc */
3770 (traverseproc)element_gc_traverse, /* tp_traverse */
3771 (inquiry)element_gc_clear, /* tp_clear */
3772 0, /* tp_richcompare */
3773 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3774 0, /* tp_iter */
3775 0, /* tp_iternext */
3776 element_methods, /* tp_methods */
3777 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003778 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003779 0, /* tp_base */
3780 0, /* tp_dict */
3781 0, /* tp_descr_get */
3782 0, /* tp_descr_set */
3783 0, /* tp_dictoffset */
3784 (initproc)element_init, /* tp_init */
3785 PyType_GenericAlloc, /* tp_alloc */
3786 element_new, /* tp_new */
3787 0, /* tp_free */
3788};
3789
3790static PyMethodDef treebuilder_methods[] = {
3791 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3792 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3793 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3794 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3795 {NULL, NULL}
3796};
3797
3798static PyTypeObject TreeBuilder_Type = {
3799 PyVarObject_HEAD_INIT(NULL, 0)
3800 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3801 /* methods */
3802 (destructor)treebuilder_dealloc, /* tp_dealloc */
3803 0, /* tp_print */
3804 0, /* tp_getattr */
3805 0, /* tp_setattr */
3806 0, /* tp_reserved */
3807 0, /* tp_repr */
3808 0, /* tp_as_number */
3809 0, /* tp_as_sequence */
3810 0, /* tp_as_mapping */
3811 0, /* tp_hash */
3812 0, /* tp_call */
3813 0, /* tp_str */
3814 0, /* tp_getattro */
3815 0, /* tp_setattro */
3816 0, /* tp_as_buffer */
3817 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3818 /* tp_flags */
3819 0, /* tp_doc */
3820 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3821 (inquiry)treebuilder_gc_clear, /* tp_clear */
3822 0, /* tp_richcompare */
3823 0, /* tp_weaklistoffset */
3824 0, /* tp_iter */
3825 0, /* tp_iternext */
3826 treebuilder_methods, /* tp_methods */
3827 0, /* tp_members */
3828 0, /* tp_getset */
3829 0, /* tp_base */
3830 0, /* tp_dict */
3831 0, /* tp_descr_get */
3832 0, /* tp_descr_set */
3833 0, /* tp_dictoffset */
3834 _elementtree_TreeBuilder___init__, /* tp_init */
3835 PyType_GenericAlloc, /* tp_alloc */
3836 treebuilder_new, /* tp_new */
3837 0, /* tp_free */
3838};
3839
3840static PyMethodDef xmlparser_methods[] = {
3841 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3842 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3843 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3844 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3845 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3846 {NULL, NULL}
3847};
3848
Neal Norwitz227b5332006-03-22 09:28:35 +00003849static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003850 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003851 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003852 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003853 (destructor)xmlparser_dealloc, /* tp_dealloc */
3854 0, /* tp_print */
3855 0, /* tp_getattr */
3856 0, /* tp_setattr */
3857 0, /* tp_reserved */
3858 0, /* tp_repr */
3859 0, /* tp_as_number */
3860 0, /* tp_as_sequence */
3861 0, /* tp_as_mapping */
3862 0, /* tp_hash */
3863 0, /* tp_call */
3864 0, /* tp_str */
3865 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3866 0, /* tp_setattro */
3867 0, /* tp_as_buffer */
3868 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3869 /* tp_flags */
3870 0, /* tp_doc */
3871 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3872 (inquiry)xmlparser_gc_clear, /* tp_clear */
3873 0, /* tp_richcompare */
3874 0, /* tp_weaklistoffset */
3875 0, /* tp_iter */
3876 0, /* tp_iternext */
3877 xmlparser_methods, /* tp_methods */
3878 0, /* tp_members */
3879 0, /* tp_getset */
3880 0, /* tp_base */
3881 0, /* tp_dict */
3882 0, /* tp_descr_get */
3883 0, /* tp_descr_set */
3884 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003885 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003886 PyType_GenericAlloc, /* tp_alloc */
3887 xmlparser_new, /* tp_new */
3888 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003889};
3890
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003891/* ==================================================================== */
3892/* python module interface */
3893
3894static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003895 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003896 {NULL, NULL}
3897};
3898
Martin v. Löwis1a214512008-06-11 05:26:20 +00003899
Eli Bendersky532d03e2013-08-10 08:00:39 -07003900static struct PyModuleDef elementtreemodule = {
3901 PyModuleDef_HEAD_INIT,
3902 "_elementtree",
3903 NULL,
3904 sizeof(elementtreestate),
3905 _functions,
3906 NULL,
3907 elementtree_traverse,
3908 elementtree_clear,
3909 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003910};
3911
Neal Norwitzf6657e62006-12-28 04:47:50 +00003912PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003913PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003914{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003915 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003916 elementtreestate *st;
3917
3918 m = PyState_FindModule(&elementtreemodule);
3919 if (m) {
3920 Py_INCREF(m);
3921 return m;
3922 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003923
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003924 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003925 if (PyType_Ready(&ElementIter_Type) < 0)
3926 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003927 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003928 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003929 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003930 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003931 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003932 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003933
Eli Bendersky532d03e2013-08-10 08:00:39 -07003934 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003935 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003936 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003937 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003938
Eli Bendersky828efde2012-04-05 05:40:58 +03003939 if (!(temp = PyImport_ImportModule("copy")))
3940 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003941 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003942 Py_XDECREF(temp);
3943
Eli Bendersky532d03e2013-08-10 08:00:39 -07003944 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003945 return NULL;
3946
Eli Bendersky20d41742012-06-01 09:48:37 +03003947 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003948 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3949 if (expat_capi) {
3950 /* check that it's usable */
3951 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003952 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003953 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3954 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003955 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003956 PyErr_SetString(PyExc_ImportError,
3957 "pyexpat version is incompatible");
3958 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003959 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003960 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003961 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003962 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003963
Eli Bendersky532d03e2013-08-10 08:00:39 -07003964 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003965 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003966 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003967 Py_INCREF(st->parseerror_obj);
3968 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003969
Eli Bendersky092af1f2012-03-04 07:14:03 +02003970 Py_INCREF((PyObject *)&Element_Type);
3971 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3972
Eli Bendersky58d548d2012-05-29 15:45:16 +03003973 Py_INCREF((PyObject *)&TreeBuilder_Type);
3974 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3975
Eli Bendersky52467b12012-06-01 07:13:08 +03003976 Py_INCREF((PyObject *)&XMLParser_Type);
3977 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003978
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003979 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003980}