blob: 2cda98e61127d6d4e24422d227d002097072aaac [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200629 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300630
631 if (self->weakreflist != NULL)
632 PyObject_ClearWeakRefs((PyObject *) self);
633
Eli Bendersky0192ba32012-03-30 16:38:33 +0300634 /* element_gc_clear clears all references and deallocates extra
635 */
636 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000637
638 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200639 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200640 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000641}
642
643/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
Serhiy Storchakacb985562015-05-04 15:32:48 +0300645/*[clinic input]
646_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648 subelement: object(subclass_of='&Element_Type')
649 /
650
651[clinic start generated code]*/
652
653static PyObject *
654_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
655/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
656{
657 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658 return NULL;
659
660 Py_RETURN_NONE;
661}
662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666[clinic start generated code]*/
667
668static PyObject *
669_elementtree_Element_clear_impl(ElementObject *self)
670/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
671{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300672 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 Py_INCREF(Py_None);
675 Py_DECREF(JOIN_OBJ(self->text));
676 self->text = Py_None;
677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->tail));
680 self->tail = Py_None;
681
682 Py_RETURN_NONE;
683}
684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.__copy__
687
688[clinic start generated code]*/
689
690static PyObject *
691_elementtree_Element___copy___impl(ElementObject *self)
692/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 ElementObject* element;
696
Eli Bendersky092af1f2012-03-04 07:14:03 +0200697 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800698 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699 if (!element)
700 return NULL;
701
702 Py_DECREF(JOIN_OBJ(element->text));
703 element->text = self->text;
704 Py_INCREF(JOIN_OBJ(element->text));
705
706 Py_DECREF(JOIN_OBJ(element->tail));
707 element->tail = self->tail;
708 Py_INCREF(JOIN_OBJ(element->tail));
709
710 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000711 if (element_resize(element, self->extra->length) < 0) {
712 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 for (i = 0; i < self->extra->length; i++) {
717 Py_INCREF(self->extra->children[i]);
718 element->extra->children[i] = self->extra->children[i];
719 }
720
721 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722 }
723
724 return (PyObject*) element;
725}
726
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200727/* Helper for a deep copy. */
728LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
729
Serhiy Storchakacb985562015-05-04 15:32:48 +0300730/*[clinic input]
731_elementtree.Element.__deepcopy__
732
733 memo: object
734 /
735
736[clinic start generated code]*/
737
738static PyObject *
739_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
740/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 ElementObject* element;
744 PyObject* tag;
745 PyObject* attrib;
746 PyObject* text;
747 PyObject* tail;
748 PyObject* id;
749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700802 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200820LOCAL(PyObject *)
821deepcopy(PyObject *object, PyObject *memo)
822{
823 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200824 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200825 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200826
827 /* Fast paths */
828 if (object == Py_None || PyUnicode_CheckExact(object)) {
829 Py_INCREF(object);
830 return object;
831 }
832
833 if (Py_REFCNT(object) == 1) {
834 if (PyDict_CheckExact(object)) {
835 PyObject *key, *value;
836 Py_ssize_t pos = 0;
837 int simple = 1;
838 while (PyDict_Next(object, &pos, &key, &value)) {
839 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
840 simple = 0;
841 break;
842 }
843 }
844 if (simple)
845 return PyDict_Copy(object);
846 /* Fall through to general case */
847 }
848 else if (Element_CheckExact(object)) {
849 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
850 }
851 }
852
853 /* General case */
854 st = ET_STATE_GLOBAL;
855 if (!st->deepcopy_obj) {
856 PyErr_SetString(PyExc_RuntimeError,
857 "deepcopy helper not found");
858 return NULL;
859 }
860
Victor Stinner7fbac452016-08-20 01:34:44 +0200861 stack[0] = object;
862 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200863 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200864}
865
866
Serhiy Storchakacb985562015-05-04 15:32:48 +0300867/*[clinic input]
868_elementtree.Element.__sizeof__ -> Py_ssize_t
869
870[clinic start generated code]*/
871
872static Py_ssize_t
873_elementtree_Element___sizeof___impl(ElementObject *self)
874/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200876 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877 if (self->extra) {
878 result += sizeof(ElementObjectExtra);
879 if (self->extra->children != self->extra->_children)
880 result += sizeof(PyObject*) * self->extra->allocated;
881 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300882 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200883}
884
Eli Bendersky698bdb22013-01-10 06:01:06 -0800885/* dict keys for getstate/setstate. */
886#define PICKLED_TAG "tag"
887#define PICKLED_CHILDREN "_children"
888#define PICKLED_ATTRIB "attrib"
889#define PICKLED_TAIL "tail"
890#define PICKLED_TEXT "text"
891
892/* __getstate__ returns a fabricated instance dict as in the pure-Python
893 * Element implementation, for interoperability/interchangeability. This
894 * makes the pure-Python implementation details an API, but (a) there aren't
895 * any unnecessary structures there; and (b) it buys compatibility with 3.2
896 * pickles. See issue #16076.
897 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300898/*[clinic input]
899_elementtree.Element.__getstate__
900
901[clinic start generated code]*/
902
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300904_elementtree_Element___getstate___impl(ElementObject *self)
905/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200907 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908 PyObject *instancedict = NULL, *children;
909
910 /* Build a list of children. */
911 children = PyList_New(self->extra ? self->extra->length : 0);
912 if (!children)
913 return NULL;
914 for (i = 0; i < PyList_GET_SIZE(children); i++) {
915 PyObject *child = self->extra->children[i];
916 Py_INCREF(child);
917 PyList_SET_ITEM(children, i, child);
918 }
919
920 /* Construct the state object. */
921 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
922 if (noattrib)
923 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
924 PICKLED_TAG, self->tag,
925 PICKLED_CHILDREN, children,
926 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700927 PICKLED_TEXT, JOIN_OBJ(self->text),
928 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800929 else
930 instancedict = Py_BuildValue("{sOsOsOsOsO}",
931 PICKLED_TAG, self->tag,
932 PICKLED_CHILDREN, children,
933 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700934 PICKLED_TEXT, JOIN_OBJ(self->text),
935 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800936 if (instancedict) {
937 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800939 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 else {
941 for (i = 0; i < PyList_GET_SIZE(children); i++)
942 Py_DECREF(PyList_GET_ITEM(children, i));
943 Py_DECREF(children);
944
945 return NULL;
946 }
947}
948
949static PyObject *
950element_setstate_from_attributes(ElementObject *self,
951 PyObject *tag,
952 PyObject *attrib,
953 PyObject *text,
954 PyObject *tail,
955 PyObject *children)
956{
957 Py_ssize_t i, nchildren;
958
959 if (!tag) {
960 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
961 return NULL;
962 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800963
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200964 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300965 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800966
Eli Benderskydd3661e2013-09-13 06:24:25 -0700967 _clear_joined_ptr(&self->text);
968 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
969 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Eli Benderskydd3661e2013-09-13 06:24:25 -0700971 _clear_joined_ptr(&self->tail);
972 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
973 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
975 /* Handle ATTRIB and CHILDREN. */
976 if (!children && !attrib)
977 Py_RETURN_NONE;
978
979 /* Compute 'nchildren'. */
980 if (children) {
981 if (!PyList_Check(children)) {
982 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
983 return NULL;
984 }
985 nchildren = PyList_Size(children);
986 }
987 else {
988 nchildren = 0;
989 }
990
991 /* Allocate 'extra'. */
992 if (element_resize(self, nchildren)) {
993 return NULL;
994 }
995 assert(self->extra && self->extra->allocated >= nchildren);
996
997 /* Copy children */
998 for (i = 0; i < nchildren; i++) {
999 self->extra->children[i] = PyList_GET_ITEM(children, i);
1000 Py_INCREF(self->extra->children[i]);
1001 }
1002
1003 self->extra->length = nchildren;
1004 self->extra->allocated = nchildren;
1005
1006 /* Stash attrib. */
1007 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001009 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 }
1011
1012 Py_RETURN_NONE;
1013}
1014
1015/* __setstate__ for Element instance from the Python implementation.
1016 * 'state' should be the instance dict.
1017 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001018
Eli Bendersky698bdb22013-01-10 06:01:06 -08001019static PyObject *
1020element_setstate_from_Python(ElementObject *self, PyObject *state)
1021{
1022 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1023 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1024 PyObject *args;
1025 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001026 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028 tag = attrib = text = tail = children = NULL;
1029 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032
1033 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1034 &attrib, &text, &tail, &children))
1035 retval = element_setstate_from_attributes(self, tag, attrib, text,
1036 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001037 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001038 retval = NULL;
1039
1040 Py_DECREF(args);
1041 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001042}
1043
Serhiy Storchakacb985562015-05-04 15:32:48 +03001044/*[clinic input]
1045_elementtree.Element.__setstate__
1046
1047 state: object
1048 /
1049
1050[clinic start generated code]*/
1051
Eli Bendersky698bdb22013-01-10 06:01:06 -08001052static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001053_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1054/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001055{
1056 if (!PyDict_CheckExact(state)) {
1057 PyErr_Format(PyExc_TypeError,
1058 "Don't know how to unpickle \"%.200R\" as an Element",
1059 state);
1060 return NULL;
1061 }
1062 else
1063 return element_setstate_from_Python(self, state);
1064}
1065
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001066LOCAL(int)
1067checkpath(PyObject* tag)
1068{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001069 Py_ssize_t i;
1070 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071
1072 /* check if a tag contains an xpath character */
1073
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001074#define PATHCHAR(ch) \
1075 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001078 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1079 void *data = PyUnicode_DATA(tag);
1080 unsigned int kind = PyUnicode_KIND(tag);
1081 for (i = 0; i < len; i++) {
1082 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1083 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 return 1;
1089 }
1090 return 0;
1091 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001092 if (PyBytes_Check(tag)) {
1093 char *p = PyBytes_AS_STRING(tag);
1094 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001095 if (p[i] == '{')
1096 check = 0;
1097 else if (p[i] == '}')
1098 check = 1;
1099 else if (check && PATHCHAR(p[i]))
1100 return 1;
1101 }
1102 return 0;
1103 }
1104
1105 return 1; /* unknown type; might be path expression */
1106}
1107
Serhiy Storchakacb985562015-05-04 15:32:48 +03001108/*[clinic input]
1109_elementtree.Element.extend
1110
1111 elements: object
1112 /
1113
1114[clinic start generated code]*/
1115
1116static PyObject *
1117_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1118/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119{
1120 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001121 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122
Serhiy Storchakacb985562015-05-04 15:32:48 +03001123 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 if (!seq) {
1125 PyErr_Format(
1126 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 );
1129 return NULL;
1130 }
1131
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001133 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 Py_INCREF(element);
1135 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001136 PyErr_Format(
1137 PyExc_TypeError,
1138 "expected an Element, not \"%.200s\"",
1139 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001140 Py_DECREF(seq);
1141 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001142 return NULL;
1143 }
1144
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145 if (element_add_subelement(self, element) < 0) {
1146 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001147 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001148 return NULL;
1149 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001150 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 }
1152
1153 Py_DECREF(seq);
1154
1155 Py_RETURN_NONE;
1156}
1157
Serhiy Storchakacb985562015-05-04 15:32:48 +03001158/*[clinic input]
1159_elementtree.Element.find
1160
1161 path: object
1162 namespaces: object = None
1163
1164[clinic start generated code]*/
1165
1166static PyObject *
1167_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1168 PyObject *namespaces)
1169/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001171 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001172 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001173
Serhiy Storchakacb985562015-05-04 15:32:48 +03001174 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001176 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001179 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180
1181 if (!self->extra)
1182 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184 for (i = 0; i < self->extra->length; i++) {
1185 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001186 int rc;
1187 if (!Element_CheckExact(item))
1188 continue;
1189 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001190 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001192 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 Py_DECREF(item);
1194 if (rc < 0)
1195 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 }
1197
1198 Py_RETURN_NONE;
1199}
1200
Serhiy Storchakacb985562015-05-04 15:32:48 +03001201/*[clinic input]
1202_elementtree.Element.findtext
1203
1204 path: object
1205 default: object = None
1206 namespaces: object = None
1207
1208[clinic start generated code]*/
1209
1210static PyObject *
1211_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1212 PyObject *default_value,
1213 PyObject *namespaces)
1214/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001215{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001216 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001217 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001218 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001219
Serhiy Storchakacb985562015-05-04 15:32:48 +03001220 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001223 );
1224
1225 if (!self->extra) {
1226 Py_INCREF(default_value);
1227 return default_value;
1228 }
1229
1230 for (i = 0; i < self->extra->length; i++) {
1231 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001232 int rc;
1233 if (!Element_CheckExact(item))
1234 continue;
1235 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001236 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001237 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 if (text == Py_None) {
1240 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001241 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001242 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001243 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001245 return text;
1246 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001247 Py_DECREF(item);
1248 if (rc < 0)
1249 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 }
1251
1252 Py_INCREF(default_value);
1253 return default_value;
1254}
1255
Serhiy Storchakacb985562015-05-04 15:32:48 +03001256/*[clinic input]
1257_elementtree.Element.findall
1258
1259 path: object
1260 namespaces: object = None
1261
1262[clinic start generated code]*/
1263
1264static PyObject *
1265_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1266 PyObject *namespaces)
1267/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001268{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001269 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001270 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001271 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001272 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001273
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001274 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001275 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001277 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001278 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280
1281 out = PyList_New(0);
1282 if (!out)
1283 return NULL;
1284
1285 if (!self->extra)
1286 return out;
1287
1288 for (i = 0; i < self->extra->length; i++) {
1289 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001290 int rc;
1291 if (!Element_CheckExact(item))
1292 continue;
1293 Py_INCREF(item);
1294 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1295 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1296 Py_DECREF(item);
1297 Py_DECREF(out);
1298 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001299 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001300 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301 }
1302
1303 return out;
1304}
1305
Serhiy Storchakacb985562015-05-04 15:32:48 +03001306/*[clinic input]
1307_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001308
Serhiy Storchakacb985562015-05-04 15:32:48 +03001309 path: object
1310 namespaces: object = None
1311
1312[clinic start generated code]*/
1313
1314static PyObject *
1315_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1316 PyObject *namespaces)
1317/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1318{
1319 PyObject* tag = path;
1320 _Py_IDENTIFIER(iterfind);
1321 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001322
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001323 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001324 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001325}
1326
Serhiy Storchakacb985562015-05-04 15:32:48 +03001327/*[clinic input]
1328_elementtree.Element.get
1329
1330 key: object
1331 default: object = None
1332
1333[clinic start generated code]*/
1334
1335static PyObject *
1336_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1337 PyObject *default_value)
1338/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001339{
1340 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341
1342 if (!self->extra || self->extra->attrib == Py_None)
1343 value = default_value;
1344 else {
1345 value = PyDict_GetItem(self->extra->attrib, key);
1346 if (!value)
1347 value = default_value;
1348 }
1349
1350 Py_INCREF(value);
1351 return value;
1352}
1353
Serhiy Storchakacb985562015-05-04 15:32:48 +03001354/*[clinic input]
1355_elementtree.Element.getchildren
1356
1357[clinic start generated code]*/
1358
1359static PyObject *
1360_elementtree_Element_getchildren_impl(ElementObject *self)
1361/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001362{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001363 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364 PyObject* list;
1365
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001366 /* FIXME: report as deprecated? */
1367
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368 if (!self->extra)
1369 return PyList_New(0);
1370
1371 list = PyList_New(self->extra->length);
1372 if (!list)
1373 return NULL;
1374
1375 for (i = 0; i < self->extra->length; i++) {
1376 PyObject* item = self->extra->children[i];
1377 Py_INCREF(item);
1378 PyList_SET_ITEM(list, i, item);
1379 }
1380
1381 return list;
1382}
1383
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001384
Eli Bendersky64d11e62012-06-15 07:42:50 +03001385static PyObject *
1386create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1387
1388
Serhiy Storchakacb985562015-05-04 15:32:48 +03001389/*[clinic input]
1390_elementtree.Element.iter
1391
1392 tag: object = None
1393
1394[clinic start generated code]*/
1395
Eli Bendersky64d11e62012-06-15 07:42:50 +03001396static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001397_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1398/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001399{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001400 if (PyUnicode_Check(tag)) {
1401 if (PyUnicode_READY(tag) < 0)
1402 return NULL;
1403 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1404 tag = Py_None;
1405 }
1406 else if (PyBytes_Check(tag)) {
1407 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1408 tag = Py_None;
1409 }
1410
Eli Bendersky64d11e62012-06-15 07:42:50 +03001411 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001412}
1413
1414
Serhiy Storchakacb985562015-05-04 15:32:48 +03001415/*[clinic input]
1416_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001417
Serhiy Storchakacb985562015-05-04 15:32:48 +03001418[clinic start generated code]*/
1419
1420static PyObject *
1421_elementtree_Element_itertext_impl(ElementObject *self)
1422/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1423{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001424 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001425}
1426
Eli Bendersky64d11e62012-06-15 07:42:50 +03001427
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001428static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001429element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001431 ElementObject* self = (ElementObject*) self_;
1432
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001433 if (!self->extra || index < 0 || index >= self->extra->length) {
1434 PyErr_SetString(
1435 PyExc_IndexError,
1436 "child index out of range"
1437 );
1438 return NULL;
1439 }
1440
1441 Py_INCREF(self->extra->children[index]);
1442 return self->extra->children[index];
1443}
1444
Serhiy Storchakacb985562015-05-04 15:32:48 +03001445/*[clinic input]
1446_elementtree.Element.insert
1447
1448 index: Py_ssize_t
1449 subelement: object(subclass_of='&Element_Type')
1450 /
1451
1452[clinic start generated code]*/
1453
1454static PyObject *
1455_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1456 PyObject *subelement)
1457/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001459 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460
Victor Stinner5f0af232013-07-11 23:01:36 +02001461 if (!self->extra) {
1462 if (create_extra(self, NULL) < 0)
1463 return NULL;
1464 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001465
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001466 if (index < 0) {
1467 index += self->extra->length;
1468 if (index < 0)
1469 index = 0;
1470 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001471 if (index > self->extra->length)
1472 index = self->extra->length;
1473
1474 if (element_resize(self, 1) < 0)
1475 return NULL;
1476
1477 for (i = self->extra->length; i > index; i--)
1478 self->extra->children[i] = self->extra->children[i-1];
1479
Serhiy Storchakacb985562015-05-04 15:32:48 +03001480 Py_INCREF(subelement);
1481 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001482
1483 self->extra->length++;
1484
1485 Py_RETURN_NONE;
1486}
1487
Serhiy Storchakacb985562015-05-04 15:32:48 +03001488/*[clinic input]
1489_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001490
Serhiy Storchakacb985562015-05-04 15:32:48 +03001491[clinic start generated code]*/
1492
1493static PyObject *
1494_elementtree_Element_items_impl(ElementObject *self)
1495/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1496{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001497 if (!self->extra || self->extra->attrib == Py_None)
1498 return PyList_New(0);
1499
1500 return PyDict_Items(self->extra->attrib);
1501}
1502
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503/*[clinic input]
1504_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505
Serhiy Storchakacb985562015-05-04 15:32:48 +03001506[clinic start generated code]*/
1507
1508static PyObject *
1509_elementtree_Element_keys_impl(ElementObject *self)
1510/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1511{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001512 if (!self->extra || self->extra->attrib == Py_None)
1513 return PyList_New(0);
1514
1515 return PyDict_Keys(self->extra->attrib);
1516}
1517
Martin v. Löwis18e16552006-02-15 17:27:45 +00001518static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519element_length(ElementObject* self)
1520{
1521 if (!self->extra)
1522 return 0;
1523
1524 return self->extra->length;
1525}
1526
Serhiy Storchakacb985562015-05-04 15:32:48 +03001527/*[clinic input]
1528_elementtree.Element.makeelement
1529
1530 tag: object
1531 attrib: object
1532 /
1533
1534[clinic start generated code]*/
1535
1536static PyObject *
1537_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1538 PyObject *attrib)
1539/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001540{
1541 PyObject* elem;
1542
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543 attrib = PyDict_Copy(attrib);
1544 if (!attrib)
1545 return NULL;
1546
Eli Bendersky092af1f2012-03-04 07:14:03 +02001547 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001548
1549 Py_DECREF(attrib);
1550
1551 return elem;
1552}
1553
Serhiy Storchakacb985562015-05-04 15:32:48 +03001554/*[clinic input]
1555_elementtree.Element.remove
1556
1557 subelement: object(subclass_of='&Element_Type')
1558 /
1559
1560[clinic start generated code]*/
1561
1562static PyObject *
1563_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1564/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001565{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001566 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001567 int rc;
1568 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570 if (!self->extra) {
1571 /* element has no children, so raise exception */
1572 PyErr_SetString(
1573 PyExc_ValueError,
1574 "list.remove(x): x not in list"
1575 );
1576 return NULL;
1577 }
1578
1579 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001580 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001582 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001583 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001584 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001585 if (rc < 0)
1586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587 }
1588
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001589 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001590 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591 PyErr_SetString(
1592 PyExc_ValueError,
1593 "list.remove(x): x not in list"
1594 );
1595 return NULL;
1596 }
1597
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001598 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001599
1600 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601 for (; i < self->extra->length; i++)
1602 self->extra->children[i] = self->extra->children[i+1];
1603
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001604 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605 Py_RETURN_NONE;
1606}
1607
1608static PyObject*
1609element_repr(ElementObject* self)
1610{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001611 int status;
1612
1613 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001614 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001615
1616 status = Py_ReprEnter((PyObject *)self);
1617 if (status == 0) {
1618 PyObject *res;
1619 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1620 Py_ReprLeave((PyObject *)self);
1621 return res;
1622 }
1623 if (status > 0)
1624 PyErr_Format(PyExc_RuntimeError,
1625 "reentrant call inside %s.__repr__",
1626 Py_TYPE(self)->tp_name);
1627 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001628}
1629
Serhiy Storchakacb985562015-05-04 15:32:48 +03001630/*[clinic input]
1631_elementtree.Element.set
1632
1633 key: object
1634 value: object
1635 /
1636
1637[clinic start generated code]*/
1638
1639static PyObject *
1640_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1641 PyObject *value)
1642/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643{
1644 PyObject* attrib;
1645
Victor Stinner5f0af232013-07-11 23:01:36 +02001646 if (!self->extra) {
1647 if (create_extra(self, NULL) < 0)
1648 return NULL;
1649 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001650
1651 attrib = element_get_attrib(self);
1652 if (!attrib)
1653 return NULL;
1654
1655 if (PyDict_SetItem(attrib, key, value) < 0)
1656 return NULL;
1657
1658 Py_RETURN_NONE;
1659}
1660
1661static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001662element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001663{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001664 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001665 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001666 PyObject* old;
1667
1668 if (!self->extra || index < 0 || index >= self->extra->length) {
1669 PyErr_SetString(
1670 PyExc_IndexError,
1671 "child assignment index out of range");
1672 return -1;
1673 }
1674
1675 old = self->extra->children[index];
1676
1677 if (item) {
1678 Py_INCREF(item);
1679 self->extra->children[index] = item;
1680 } else {
1681 self->extra->length--;
1682 for (i = index; i < self->extra->length; i++)
1683 self->extra->children[i] = self->extra->children[i+1];
1684 }
1685
1686 Py_DECREF(old);
1687
1688 return 0;
1689}
1690
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001691static PyObject*
1692element_subscr(PyObject* self_, PyObject* item)
1693{
1694 ElementObject* self = (ElementObject*) self_;
1695
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001696 if (PyIndex_Check(item)) {
1697 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001698
1699 if (i == -1 && PyErr_Occurred()) {
1700 return NULL;
1701 }
1702 if (i < 0 && self->extra)
1703 i += self->extra->length;
1704 return element_getitem(self_, i);
1705 }
1706 else if (PySlice_Check(item)) {
1707 Py_ssize_t start, stop, step, slicelen, cur, i;
1708 PyObject* list;
1709
1710 if (!self->extra)
1711 return PyList_New(0);
1712
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001713 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001714 self->extra->length,
1715 &start, &stop, &step, &slicelen) < 0) {
1716 return NULL;
1717 }
1718
1719 if (slicelen <= 0)
1720 return PyList_New(0);
1721 else {
1722 list = PyList_New(slicelen);
1723 if (!list)
1724 return NULL;
1725
1726 for (cur = start, i = 0; i < slicelen;
1727 cur += step, i++) {
1728 PyObject* item = self->extra->children[cur];
1729 Py_INCREF(item);
1730 PyList_SET_ITEM(list, i, item);
1731 }
1732
1733 return list;
1734 }
1735 }
1736 else {
1737 PyErr_SetString(PyExc_TypeError,
1738 "element indices must be integers");
1739 return NULL;
1740 }
1741}
1742
1743static int
1744element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1745{
1746 ElementObject* self = (ElementObject*) self_;
1747
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001748 if (PyIndex_Check(item)) {
1749 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001750
1751 if (i == -1 && PyErr_Occurred()) {
1752 return -1;
1753 }
1754 if (i < 0 && self->extra)
1755 i += self->extra->length;
1756 return element_setitem(self_, i, value);
1757 }
1758 else if (PySlice_Check(item)) {
1759 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1760
1761 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001762 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763
Victor Stinner5f0af232013-07-11 23:01:36 +02001764 if (!self->extra) {
1765 if (create_extra(self, NULL) < 0)
1766 return -1;
1767 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001768
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001769 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001770 self->extra->length,
1771 &start, &stop, &step, &slicelen) < 0) {
1772 return -1;
1773 }
1774
Eli Bendersky865756a2012-03-09 13:38:15 +02001775 if (value == NULL) {
1776 /* Delete slice */
1777 size_t cur;
1778 Py_ssize_t i;
1779
1780 if (slicelen <= 0)
1781 return 0;
1782
1783 /* Since we're deleting, the direction of the range doesn't matter,
1784 * so for simplicity make it always ascending.
1785 */
1786 if (step < 0) {
1787 stop = start + 1;
1788 start = stop + step * (slicelen - 1) - 1;
1789 step = -step;
1790 }
1791
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001792 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001793
1794 /* recycle is a list that will contain all the children
1795 * scheduled for removal.
1796 */
1797 if (!(recycle = PyList_New(slicelen))) {
1798 PyErr_NoMemory();
1799 return -1;
1800 }
1801
1802 /* This loop walks over all the children that have to be deleted,
1803 * with cur pointing at them. num_moved is the amount of children
1804 * until the next deleted child that have to be "shifted down" to
1805 * occupy the deleted's places.
1806 * Note that in the ith iteration, shifting is done i+i places down
1807 * because i children were already removed.
1808 */
1809 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1810 /* Compute how many children have to be moved, clipping at the
1811 * list end.
1812 */
1813 Py_ssize_t num_moved = step - 1;
1814 if (cur + step >= (size_t)self->extra->length) {
1815 num_moved = self->extra->length - cur - 1;
1816 }
1817
1818 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1819
1820 memmove(
1821 self->extra->children + cur - i,
1822 self->extra->children + cur + 1,
1823 num_moved * sizeof(PyObject *));
1824 }
1825
1826 /* Leftover "tail" after the last removed child */
1827 cur = start + (size_t)slicelen * step;
1828 if (cur < (size_t)self->extra->length) {
1829 memmove(
1830 self->extra->children + cur - slicelen,
1831 self->extra->children + cur,
1832 (self->extra->length - cur) * sizeof(PyObject *));
1833 }
1834
1835 self->extra->length -= slicelen;
1836
1837 /* Discard the recycle list with all the deleted sub-elements */
1838 Py_XDECREF(recycle);
1839 return 0;
1840 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001841
1842 /* A new slice is actually being assigned */
1843 seq = PySequence_Fast(value, "");
1844 if (!seq) {
1845 PyErr_Format(
1846 PyExc_TypeError,
1847 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1848 );
1849 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001851 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852
1853 if (step != 1 && newlen != slicelen)
1854 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001855 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001856 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001857 "attempt to assign sequence of size %zd "
1858 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 newlen, slicelen
1860 );
1861 return -1;
1862 }
1863
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001864 /* Resize before creating the recycle bin, to prevent refleaks. */
1865 if (newlen > slicelen) {
1866 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001867 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001868 return -1;
1869 }
1870 }
1871
1872 if (slicelen > 0) {
1873 /* to avoid recursive calls to this method (via decref), move
1874 old items to the recycle bin here, and get rid of them when
1875 we're done modifying the element */
1876 recycle = PyList_New(slicelen);
1877 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001878 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001879 return -1;
1880 }
1881 for (cur = start, i = 0; i < slicelen;
1882 cur += step, i++)
1883 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1884 }
1885
1886 if (newlen < slicelen) {
1887 /* delete slice */
1888 for (i = stop; i < self->extra->length; i++)
1889 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1890 } else if (newlen > slicelen) {
1891 /* insert slice */
1892 for (i = self->extra->length-1; i >= stop; i--)
1893 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1894 }
1895
1896 /* replace the slice */
1897 for (cur = start, i = 0; i < newlen;
1898 cur += step, i++) {
1899 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1900 Py_INCREF(element);
1901 self->extra->children[cur] = element;
1902 }
1903
1904 self->extra->length += newlen - slicelen;
1905
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001906 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001907
1908 /* discard the recycle bin, and everything in it */
1909 Py_XDECREF(recycle);
1910
1911 return 0;
1912 }
1913 else {
1914 PyErr_SetString(PyExc_TypeError,
1915 "element indices must be integers");
1916 return -1;
1917 }
1918}
1919
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001920static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001921element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001922{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001923 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001924 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001925 return res;
1926}
1927
Serhiy Storchakadde08152015-11-25 15:28:13 +02001928static PyObject*
1929element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001930{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001931 PyObject *res = element_get_text(self);
1932 Py_XINCREF(res);
1933 return res;
1934}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001935
Serhiy Storchakadde08152015-11-25 15:28:13 +02001936static PyObject*
1937element_tail_getter(ElementObject *self, void *closure)
1938{
1939 PyObject *res = element_get_tail(self);
1940 Py_XINCREF(res);
1941 return res;
1942}
1943
1944static PyObject*
1945element_attrib_getter(ElementObject *self, void *closure)
1946{
1947 PyObject *res;
1948 if (!self->extra) {
1949 if (create_extra(self, NULL) < 0)
1950 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001951 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001952 res = element_get_attrib(self);
1953 Py_XINCREF(res);
1954 return res;
1955}
Victor Stinner4d463432013-07-11 23:05:03 +02001956
Serhiy Storchakadde08152015-11-25 15:28:13 +02001957/* macro for setter validation */
1958#define _VALIDATE_ATTR_VALUE(V) \
1959 if ((V) == NULL) { \
1960 PyErr_SetString( \
1961 PyExc_AttributeError, \
1962 "can't delete element attribute"); \
1963 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001964 }
1965
Serhiy Storchakadde08152015-11-25 15:28:13 +02001966static int
1967element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1968{
1969 _VALIDATE_ATTR_VALUE(value);
1970 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001971 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001972 return 0;
1973}
1974
1975static int
1976element_text_setter(ElementObject *self, PyObject *value, void *closure)
1977{
1978 _VALIDATE_ATTR_VALUE(value);
1979 Py_INCREF(value);
1980 Py_DECREF(JOIN_OBJ(self->text));
1981 self->text = value;
1982 return 0;
1983}
1984
1985static int
1986element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1987{
1988 _VALIDATE_ATTR_VALUE(value);
1989 Py_INCREF(value);
1990 Py_DECREF(JOIN_OBJ(self->tail));
1991 self->tail = value;
1992 return 0;
1993}
1994
1995static int
1996element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1997{
1998 _VALIDATE_ATTR_VALUE(value);
1999 if (!self->extra) {
2000 if (create_extra(self, NULL) < 0)
2001 return -1;
2002 }
2003 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002004 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002005 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002006}
2007
2008static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002009 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002010 0, /* sq_concat */
2011 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002012 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002013 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002015 0,
2016};
2017
Eli Bendersky64d11e62012-06-15 07:42:50 +03002018/******************************* Element iterator ****************************/
2019
2020/* ElementIterObject represents the iteration state over an XML element in
2021 * pre-order traversal. To keep track of which sub-element should be returned
2022 * next, a stack of parents is maintained. This is a standard stack-based
2023 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002024 * The stack is managed using a continuous array.
2025 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002026 * the current one is exhausted, and the next child to examine in that parent.
2027 */
2028typedef struct ParentLocator_t {
2029 ElementObject *parent;
2030 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002031} ParentLocator;
2032
2033typedef struct {
2034 PyObject_HEAD
2035 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002036 Py_ssize_t parent_stack_used;
2037 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002038 ElementObject *root_element;
2039 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002040 int gettext;
2041} ElementIterObject;
2042
2043
2044static void
2045elementiter_dealloc(ElementIterObject *it)
2046{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002047 Py_ssize_t i = it->parent_stack_used;
2048 it->parent_stack_used = 0;
2049 while (i--)
2050 Py_XDECREF(it->parent_stack[i].parent);
2051 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002052
2053 Py_XDECREF(it->sought_tag);
2054 Py_XDECREF(it->root_element);
2055
2056 PyObject_GC_UnTrack(it);
2057 PyObject_GC_Del(it);
2058}
2059
2060static int
2061elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2062{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002063 Py_ssize_t i = it->parent_stack_used;
2064 while (i--)
2065 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002066
2067 Py_VISIT(it->root_element);
2068 Py_VISIT(it->sought_tag);
2069 return 0;
2070}
2071
2072/* Helper function for elementiter_next. Add a new parent to the parent stack.
2073 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002074static int
2075parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002076{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077 ParentLocator *item;
2078
2079 if (it->parent_stack_used >= it->parent_stack_size) {
2080 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2081 ParentLocator *parent_stack = it->parent_stack;
2082 PyMem_Resize(parent_stack, ParentLocator, new_size);
2083 if (parent_stack == NULL)
2084 return -1;
2085 it->parent_stack = parent_stack;
2086 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002087 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002088 item = it->parent_stack + it->parent_stack_used++;
2089 Py_INCREF(parent);
2090 item->parent = parent;
2091 item->child_index = 0;
2092 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002093}
2094
2095static PyObject *
2096elementiter_next(ElementIterObject *it)
2097{
2098 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002099 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002100 * A short note on gettext: this function serves both the iter() and
2101 * itertext() methods to avoid code duplication. However, there are a few
2102 * small differences in the way these iterations work. Namely:
2103 * - itertext() only yields text from nodes that have it, and continues
2104 * iterating when a node doesn't have text (so it doesn't return any
2105 * node like iter())
2106 * - itertext() also has to handle tail, after finishing with all the
2107 * children of a node.
2108 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002109 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002110 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002111 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002112
2113 while (1) {
2114 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002115 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002116 * iterator is exhausted.
2117 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002118 if (!it->parent_stack_used) {
2119 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120 PyErr_SetNone(PyExc_StopIteration);
2121 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002122 }
2123
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002124 elem = it->root_element; /* steals a reference */
2125 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126 }
2127 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002128 /* See if there are children left to traverse in the current parent. If
2129 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002130 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002131 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2132 Py_ssize_t child_index = item->child_index;
2133 ElementObjectExtra *extra;
2134 elem = item->parent;
2135 extra = elem->extra;
2136 if (!extra || child_index >= extra->length) {
2137 it->parent_stack_used--;
2138 /* Note that extra condition on it->parent_stack_used here;
2139 * this is because itertext() is supposed to only return *inner*
2140 * text, not text following the element it began iteration with.
2141 */
2142 if (it->gettext && it->parent_stack_used) {
2143 text = element_get_tail(elem);
2144 goto gettext;
2145 }
2146 Py_DECREF(elem);
2147 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002148 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002149
2150 elem = (ElementObject *)extra->children[child_index];
2151 item->child_index++;
2152 Py_INCREF(elem);
2153 }
2154
2155 if (parent_stack_push_new(it, elem) < 0) {
2156 Py_DECREF(elem);
2157 PyErr_NoMemory();
2158 return NULL;
2159 }
2160 if (it->gettext) {
2161 text = element_get_text(elem);
2162 goto gettext;
2163 }
2164
2165 if (it->sought_tag == Py_None)
2166 return (PyObject *)elem;
2167
2168 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2169 if (rc > 0)
2170 return (PyObject *)elem;
2171
2172 Py_DECREF(elem);
2173 if (rc < 0)
2174 return NULL;
2175 continue;
2176
2177gettext:
2178 if (!text) {
2179 Py_DECREF(elem);
2180 return NULL;
2181 }
2182 if (text == Py_None) {
2183 Py_DECREF(elem);
2184 }
2185 else {
2186 Py_INCREF(text);
2187 Py_DECREF(elem);
2188 rc = PyObject_IsTrue(text);
2189 if (rc > 0)
2190 return text;
2191 Py_DECREF(text);
2192 if (rc < 0)
2193 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002194 }
2195 }
2196
2197 return NULL;
2198}
2199
2200
2201static PyTypeObject ElementIter_Type = {
2202 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002203 /* Using the module's name since the pure-Python implementation does not
2204 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002205 "_elementtree._element_iterator", /* tp_name */
2206 sizeof(ElementIterObject), /* tp_basicsize */
2207 0, /* tp_itemsize */
2208 /* methods */
2209 (destructor)elementiter_dealloc, /* tp_dealloc */
2210 0, /* tp_print */
2211 0, /* tp_getattr */
2212 0, /* tp_setattr */
2213 0, /* tp_reserved */
2214 0, /* tp_repr */
2215 0, /* tp_as_number */
2216 0, /* tp_as_sequence */
2217 0, /* tp_as_mapping */
2218 0, /* tp_hash */
2219 0, /* tp_call */
2220 0, /* tp_str */
2221 0, /* tp_getattro */
2222 0, /* tp_setattro */
2223 0, /* tp_as_buffer */
2224 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2225 0, /* tp_doc */
2226 (traverseproc)elementiter_traverse, /* tp_traverse */
2227 0, /* tp_clear */
2228 0, /* tp_richcompare */
2229 0, /* tp_weaklistoffset */
2230 PyObject_SelfIter, /* tp_iter */
2231 (iternextfunc)elementiter_next, /* tp_iternext */
2232 0, /* tp_methods */
2233 0, /* tp_members */
2234 0, /* tp_getset */
2235 0, /* tp_base */
2236 0, /* tp_dict */
2237 0, /* tp_descr_get */
2238 0, /* tp_descr_set */
2239 0, /* tp_dictoffset */
2240 0, /* tp_init */
2241 0, /* tp_alloc */
2242 0, /* tp_new */
2243};
2244
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002245#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002246
2247static PyObject *
2248create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2249{
2250 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251
2252 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2253 if (!it)
2254 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002255
Victor Stinner4d463432013-07-11 23:05:03 +02002256 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002258 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002259 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260 it->root_element = self;
2261
Eli Bendersky64d11e62012-06-15 07:42:50 +03002262 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002263
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002264 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002265 if (it->parent_stack == NULL) {
2266 Py_DECREF(it);
2267 PyErr_NoMemory();
2268 return NULL;
2269 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002270 it->parent_stack_used = 0;
2271 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002272
Eli Bendersky64d11e62012-06-15 07:42:50 +03002273 return (PyObject *)it;
2274}
2275
2276
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002277/* ==================================================================== */
2278/* the tree builder type */
2279
2280typedef struct {
2281 PyObject_HEAD
2282
Eli Bendersky58d548d2012-05-29 15:45:16 +03002283 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284
Antoine Pitrouee329312012-10-04 19:53:29 +02002285 PyObject *this; /* current node */
2286 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
Eli Bendersky58d548d2012-05-29 15:45:16 +03002288 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002289
Eli Bendersky58d548d2012-05-29 15:45:16 +03002290 PyObject *stack; /* element stack */
2291 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002292
Eli Bendersky48d358b2012-05-30 17:57:50 +03002293 PyObject *element_factory;
2294
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002296 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002297 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2298 PyObject *end_event_obj;
2299 PyObject *start_ns_event_obj;
2300 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002301} TreeBuilderObject;
2302
Christian Heimes90aa7642007-12-19 02:45:37 +00002303#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002304
2305/* -------------------------------------------------------------------- */
2306/* constructor and destructor */
2307
Eli Bendersky58d548d2012-05-29 15:45:16 +03002308static PyObject *
2309treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002311 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2312 if (t != NULL) {
2313 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002314
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002316 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002318 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319
Eli Bendersky58d548d2012-05-29 15:45:16 +03002320 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002321 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 t->stack = PyList_New(20);
2323 if (!t->stack) {
2324 Py_DECREF(t->this);
2325 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002326 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002327 return NULL;
2328 }
2329 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002331 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002332 t->start_event_obj = t->end_event_obj = NULL;
2333 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2334 }
2335 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336}
2337
Serhiy Storchakacb985562015-05-04 15:32:48 +03002338/*[clinic input]
2339_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002340
Serhiy Storchakacb985562015-05-04 15:32:48 +03002341 element_factory: object = NULL
2342
2343[clinic start generated code]*/
2344
2345static int
2346_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2347 PyObject *element_factory)
2348/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2349{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002350 if (element_factory) {
2351 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002352 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002353 }
2354
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356}
2357
Eli Bendersky48d358b2012-05-30 17:57:50 +03002358static int
2359treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2360{
2361 Py_VISIT(self->root);
2362 Py_VISIT(self->this);
2363 Py_VISIT(self->last);
2364 Py_VISIT(self->data);
2365 Py_VISIT(self->stack);
2366 Py_VISIT(self->element_factory);
2367 return 0;
2368}
2369
2370static int
2371treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002373 Py_CLEAR(self->end_ns_event_obj);
2374 Py_CLEAR(self->start_ns_event_obj);
2375 Py_CLEAR(self->end_event_obj);
2376 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002377 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002378 Py_CLEAR(self->stack);
2379 Py_CLEAR(self->data);
2380 Py_CLEAR(self->last);
2381 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002383 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384 return 0;
2385}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387static void
2388treebuilder_dealloc(TreeBuilderObject *self)
2389{
2390 PyObject_GC_UnTrack(self);
2391 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002392 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393}
2394
2395/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002396/* helpers for handling of arbitrary element-like objects */
2397
2398static int
2399treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2400 PyObject **dest, _Py_Identifier *name)
2401{
2402 if (Element_CheckExact(element)) {
2403 Py_DECREF(JOIN_OBJ(*dest));
2404 *dest = JOIN_SET(data, PyList_CheckExact(data));
2405 return 0;
2406 }
2407 else {
2408 PyObject *joined = list_join(data);
2409 int r;
2410 if (joined == NULL)
2411 return -1;
2412 r = _PyObject_SetAttrId(element, name, joined);
2413 Py_DECREF(joined);
2414 return r;
2415 }
2416}
2417
2418/* These two functions steal a reference to data */
2419static int
2420treebuilder_set_element_text(PyObject *element, PyObject *data)
2421{
2422 _Py_IDENTIFIER(text);
2423 return treebuilder_set_element_text_or_tail(
2424 element, data, &((ElementObject *) element)->text, &PyId_text);
2425}
2426
2427static int
2428treebuilder_set_element_tail(PyObject *element, PyObject *data)
2429{
2430 _Py_IDENTIFIER(tail);
2431 return treebuilder_set_element_text_or_tail(
2432 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2433}
2434
2435static int
2436treebuilder_add_subelement(PyObject *element, PyObject *child)
2437{
2438 _Py_IDENTIFIER(append);
2439 if (Element_CheckExact(element)) {
2440 ElementObject *elem = (ElementObject *) element;
2441 return element_add_subelement(elem, child);
2442 }
2443 else {
2444 PyObject *res;
2445 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2446 if (res == NULL)
2447 return -1;
2448 Py_DECREF(res);
2449 return 0;
2450 }
2451}
2452
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002453LOCAL(int)
2454treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2455 PyObject *node)
2456{
2457 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002458 PyObject *res;
2459 PyObject *event = PyTuple_Pack(2, action, node);
2460 if (event == NULL)
2461 return -1;
2462 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2463 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002464 if (res == NULL)
2465 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002466 Py_DECREF(res);
2467 }
2468 return 0;
2469}
2470
Antoine Pitrouee329312012-10-04 19:53:29 +02002471/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472/* handlers */
2473
2474LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2476 PyObject* attrib)
2477{
2478 PyObject* node;
2479 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002480 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481
2482 if (self->data) {
2483 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002484 if (treebuilder_set_element_text(self->last, self->data))
2485 return NULL;
2486 }
2487 else {
2488 if (treebuilder_set_element_tail(self->last, self->data))
2489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 }
2491 self->data = NULL;
2492 }
2493
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002494 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002495 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002496 } else if (attrib == Py_None) {
2497 attrib = PyDict_New();
2498 if (!attrib)
2499 return NULL;
2500 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2501 Py_DECREF(attrib);
2502 }
2503 else {
2504 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002505 }
2506 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002508 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509
Antoine Pitrouee329312012-10-04 19:53:29 +02002510 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511
2512 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002513 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002514 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515 } else {
2516 if (self->root) {
2517 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002518 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 "multiple elements on top level"
2520 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002521 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 }
2523 Py_INCREF(node);
2524 self->root = node;
2525 }
2526
2527 if (self->index < PyList_GET_SIZE(self->stack)) {
2528 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002529 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 Py_INCREF(this);
2531 } else {
2532 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002533 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 }
2535 self->index++;
2536
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002538 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002540 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002542 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2543 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544
2545 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002546
2547 error:
2548 Py_DECREF(node);
2549 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550}
2551
2552LOCAL(PyObject*)
2553treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2554{
2555 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002556 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002557 /* ignore calls to data before the first call to start */
2558 Py_RETURN_NONE;
2559 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 /* store the first item as is */
2561 Py_INCREF(data); self->data = data;
2562 } else {
2563 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002564 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2565 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002566 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 /* expat often generates single character data sections; handle
2568 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002569 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2570 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002572 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 } else if (PyList_CheckExact(self->data)) {
2574 if (PyList_Append(self->data, data) < 0)
2575 return NULL;
2576 } else {
2577 PyObject* list = PyList_New(2);
2578 if (!list)
2579 return NULL;
2580 PyList_SET_ITEM(list, 0, self->data);
2581 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2582 self->data = list;
2583 }
2584 }
2585
2586 Py_RETURN_NONE;
2587}
2588
2589LOCAL(PyObject*)
2590treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2591{
2592 PyObject* item;
2593
2594 if (self->data) {
2595 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002596 if (treebuilder_set_element_text(self->last, self->data))
2597 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002599 if (treebuilder_set_element_tail(self->last, self->data))
2600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 }
2602 self->data = NULL;
2603 }
2604
2605 if (self->index == 0) {
2606 PyErr_SetString(
2607 PyExc_IndexError,
2608 "pop from empty stack"
2609 );
2610 return NULL;
2611 }
2612
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002613 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002614 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002615 self->index--;
2616 self->this = PyList_GET_ITEM(self->stack, self->index);
2617 Py_INCREF(self->this);
2618 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002619
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002620 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2621 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622
2623 Py_INCREF(self->last);
2624 return (PyObject*) self->last;
2625}
2626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627/* -------------------------------------------------------------------- */
2628/* methods (in alphabetical order) */
2629
Serhiy Storchakacb985562015-05-04 15:32:48 +03002630/*[clinic input]
2631_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632
Serhiy Storchakacb985562015-05-04 15:32:48 +03002633 data: object
2634 /
2635
2636[clinic start generated code]*/
2637
2638static PyObject *
2639_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2640/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2641{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642 return treebuilder_handle_data(self, data);
2643}
2644
Serhiy Storchakacb985562015-05-04 15:32:48 +03002645/*[clinic input]
2646_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002647
Serhiy Storchakacb985562015-05-04 15:32:48 +03002648 tag: object
2649 /
2650
2651[clinic start generated code]*/
2652
2653static PyObject *
2654_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2655/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2656{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002657 return treebuilder_handle_end(self, tag);
2658}
2659
2660LOCAL(PyObject*)
2661treebuilder_done(TreeBuilderObject* self)
2662{
2663 PyObject* res;
2664
2665 /* FIXME: check stack size? */
2666
2667 if (self->root)
2668 res = self->root;
2669 else
2670 res = Py_None;
2671
2672 Py_INCREF(res);
2673 return res;
2674}
2675
Serhiy Storchakacb985562015-05-04 15:32:48 +03002676/*[clinic input]
2677_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679[clinic start generated code]*/
2680
2681static PyObject *
2682_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2683/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2684{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685 return treebuilder_done(self);
2686}
2687
Serhiy Storchakacb985562015-05-04 15:32:48 +03002688/*[clinic input]
2689_elementtree.TreeBuilder.start
2690
2691 tag: object
2692 attrs: object = None
2693 /
2694
2695[clinic start generated code]*/
2696
2697static PyObject *
2698_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2699 PyObject *attrs)
2700/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002702 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703}
2704
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705/* ==================================================================== */
2706/* the expat interface */
2707
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002710
2711/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2712 * cached globally without being in per-module state.
2713 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002714static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Eli Bendersky52467b12012-06-01 07:13:08 +03002717static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2718 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2719
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720typedef struct {
2721 PyObject_HEAD
2722
2723 XML_Parser parser;
2724
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002725 PyObject *target;
2726 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002728 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002730 PyObject *handle_start;
2731 PyObject *handle_data;
2732 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002734 PyObject *handle_comment;
2735 PyObject *handle_pi;
2736 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002738 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002739
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740} XMLParserObject;
2741
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002742static PyObject*
2743_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2744static PyObject *
2745_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2746 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748/* helpers */
2749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750LOCAL(PyObject*)
2751makeuniversal(XMLParserObject* self, const char* string)
2752{
2753 /* convert a UTF-8 tag/attribute name from the expat parser
2754 to a universal name string */
2755
Antoine Pitrouc1948842012-10-01 23:40:37 +02002756 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 PyObject* key;
2758 PyObject* value;
2759
2760 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002761 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 if (!key)
2763 return NULL;
2764
2765 value = PyDict_GetItem(self->names, key);
2766
2767 if (value) {
2768 Py_INCREF(value);
2769 } else {
2770 /* new name. convert to universal name, and decode as
2771 necessary */
2772
2773 PyObject* tag;
2774 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002775 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776
2777 /* look for namespace separator */
2778 for (i = 0; i < size; i++)
2779 if (string[i] == '}')
2780 break;
2781 if (i != size) {
2782 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002783 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002784 if (tag == NULL) {
2785 Py_DECREF(key);
2786 return NULL;
2787 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 p[0] = '{';
2790 memcpy(p+1, string, size);
2791 size++;
2792 } else {
2793 /* plain name; use key as tag */
2794 Py_INCREF(key);
2795 tag = key;
2796 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002797
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002798 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002799 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002800 value = PyUnicode_DecodeUTF8(p, size, "strict");
2801 Py_DECREF(tag);
2802 if (!value) {
2803 Py_DECREF(key);
2804 return NULL;
2805 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002806
2807 /* add to names dictionary */
2808 if (PyDict_SetItem(self->names, key, value) < 0) {
2809 Py_DECREF(key);
2810 Py_DECREF(value);
2811 return NULL;
2812 }
2813 }
2814
2815 Py_DECREF(key);
2816 return value;
2817}
2818
Eli Bendersky5b77d812012-03-16 08:20:05 +02002819/* Set the ParseError exception with the given parameters.
2820 * If message is not NULL, it's used as the error string. Otherwise, the
2821 * message string is the default for the given error_code.
2822*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002823static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002824expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2825 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002827 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002828 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002830 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002831 message ? message : EXPAT(ErrorString)(error_code),
2832 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002833 if (errmsg == NULL)
2834 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002835
Eli Bendersky532d03e2013-08-10 08:00:39 -07002836 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002837 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002838 if (!error)
2839 return;
2840
Eli Bendersky5b77d812012-03-16 08:20:05 +02002841 /* Add code and position attributes */
2842 code = PyLong_FromLong((long)error_code);
2843 if (!code) {
2844 Py_DECREF(error);
2845 return;
2846 }
2847 if (PyObject_SetAttrString(error, "code", code) == -1) {
2848 Py_DECREF(error);
2849 Py_DECREF(code);
2850 return;
2851 }
2852 Py_DECREF(code);
2853
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002854 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002855 if (!position) {
2856 Py_DECREF(error);
2857 return;
2858 }
2859 if (PyObject_SetAttrString(error, "position", position) == -1) {
2860 Py_DECREF(error);
2861 Py_DECREF(position);
2862 return;
2863 }
2864 Py_DECREF(position);
2865
Eli Bendersky532d03e2013-08-10 08:00:39 -07002866 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002867 Py_DECREF(error);
2868}
2869
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002870/* -------------------------------------------------------------------- */
2871/* handlers */
2872
2873static void
2874expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2875 int data_len)
2876{
2877 PyObject* key;
2878 PyObject* value;
2879 PyObject* res;
2880
2881 if (data_len < 2 || data_in[0] != '&')
2882 return;
2883
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002884 if (PyErr_Occurred())
2885 return;
2886
Neal Norwitz0269b912007-08-08 06:56:02 +00002887 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002888 if (!key)
2889 return;
2890
2891 value = PyDict_GetItem(self->entity, key);
2892
2893 if (value) {
2894 if (TreeBuilder_CheckExact(self->target))
2895 res = treebuilder_handle_data(
2896 (TreeBuilderObject*) self->target, value
2897 );
2898 else if (self->handle_data)
2899 res = PyObject_CallFunction(self->handle_data, "O", value);
2900 else
2901 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002902 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002903 } else if (!PyErr_Occurred()) {
2904 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002905 char message[128] = "undefined entity ";
2906 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002907 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002908 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002909 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002910 EXPAT(GetErrorColumnNumber)(self->parser),
2911 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002912 );
2913 }
2914
2915 Py_DECREF(key);
2916}
2917
2918static void
2919expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2920 const XML_Char **attrib_in)
2921{
2922 PyObject* res;
2923 PyObject* tag;
2924 PyObject* attrib;
2925 int ok;
2926
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002927 if (PyErr_Occurred())
2928 return;
2929
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002930 /* tag name */
2931 tag = makeuniversal(self, tag_in);
2932 if (!tag)
2933 return; /* parser will look for errors */
2934
2935 /* attributes */
2936 if (attrib_in[0]) {
2937 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002938 if (!attrib) {
2939 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002941 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002942 while (attrib_in[0] && attrib_in[1]) {
2943 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002944 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 if (!key || !value) {
2946 Py_XDECREF(value);
2947 Py_XDECREF(key);
2948 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002949 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950 return;
2951 }
2952 ok = PyDict_SetItem(attrib, key, value);
2953 Py_DECREF(value);
2954 Py_DECREF(key);
2955 if (ok < 0) {
2956 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002957 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 return;
2959 }
2960 attrib_in += 2;
2961 }
2962 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002963 Py_INCREF(Py_None);
2964 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002965 }
2966
2967 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 /* shortcut */
2969 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2970 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002971 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002972 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002973 if (attrib == Py_None) {
2974 Py_DECREF(attrib);
2975 attrib = PyDict_New();
2976 if (!attrib) {
2977 Py_DECREF(tag);
2978 return;
2979 }
2980 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002982 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 res = NULL;
2984
2985 Py_DECREF(tag);
2986 Py_DECREF(attrib);
2987
2988 Py_XDECREF(res);
2989}
2990
2991static void
2992expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2993 int data_len)
2994{
2995 PyObject* data;
2996 PyObject* res;
2997
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002998 if (PyErr_Occurred())
2999 return;
3000
Neal Norwitz0269b912007-08-08 06:56:02 +00003001 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003002 if (!data)
3003 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003004
3005 if (TreeBuilder_CheckExact(self->target))
3006 /* shortcut */
3007 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3008 else if (self->handle_data)
3009 res = PyObject_CallFunction(self->handle_data, "O", data);
3010 else
3011 res = NULL;
3012
3013 Py_DECREF(data);
3014
3015 Py_XDECREF(res);
3016}
3017
3018static void
3019expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3020{
3021 PyObject* tag;
3022 PyObject* res = NULL;
3023
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003024 if (PyErr_Occurred())
3025 return;
3026
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003027 if (TreeBuilder_CheckExact(self->target))
3028 /* shortcut */
3029 /* the standard tree builder doesn't look at the end tag */
3030 res = treebuilder_handle_end(
3031 (TreeBuilderObject*) self->target, Py_None
3032 );
3033 else if (self->handle_end) {
3034 tag = makeuniversal(self, tag_in);
3035 if (tag) {
3036 res = PyObject_CallFunction(self->handle_end, "O", tag);
3037 Py_DECREF(tag);
3038 }
3039 }
3040
3041 Py_XDECREF(res);
3042}
3043
3044static void
3045expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3046 const XML_Char *uri)
3047{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003048 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3049 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003050
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003051 if (PyErr_Occurred())
3052 return;
3053
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003054 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003055 return;
3056
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003057 if (!uri)
3058 uri = "";
3059 if (!prefix)
3060 prefix = "";
3061
3062 parcel = Py_BuildValue("ss", prefix, uri);
3063 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003064 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003065 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3066 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003067}
3068
3069static void
3070expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3071{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003072 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3073
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003074 if (PyErr_Occurred())
3075 return;
3076
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003077 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003078 return;
3079
3080 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003081}
3082
3083static void
3084expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3085{
3086 PyObject* comment;
3087 PyObject* res;
3088
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003089 if (PyErr_Occurred())
3090 return;
3091
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003093 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 if (comment) {
3095 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3096 Py_XDECREF(res);
3097 Py_DECREF(comment);
3098 }
3099 }
3100}
3101
Eli Bendersky45839902013-01-13 05:14:47 -08003102static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003103expat_start_doctype_handler(XMLParserObject *self,
3104 const XML_Char *doctype_name,
3105 const XML_Char *sysid,
3106 const XML_Char *pubid,
3107 int has_internal_subset)
3108{
3109 PyObject *self_pyobj = (PyObject *)self;
3110 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3111 PyObject *parser_doctype = NULL;
3112 PyObject *res = NULL;
3113
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003114 if (PyErr_Occurred())
3115 return;
3116
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003117 doctype_name_obj = makeuniversal(self, doctype_name);
3118 if (!doctype_name_obj)
3119 return;
3120
3121 if (sysid) {
3122 sysid_obj = makeuniversal(self, sysid);
3123 if (!sysid_obj) {
3124 Py_DECREF(doctype_name_obj);
3125 return;
3126 }
3127 } else {
3128 Py_INCREF(Py_None);
3129 sysid_obj = Py_None;
3130 }
3131
3132 if (pubid) {
3133 pubid_obj = makeuniversal(self, pubid);
3134 if (!pubid_obj) {
3135 Py_DECREF(doctype_name_obj);
3136 Py_DECREF(sysid_obj);
3137 return;
3138 }
3139 } else {
3140 Py_INCREF(Py_None);
3141 pubid_obj = Py_None;
3142 }
3143
3144 /* If the target has a handler for doctype, call it. */
3145 if (self->handle_doctype) {
3146 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3147 doctype_name_obj, pubid_obj, sysid_obj);
3148 Py_CLEAR(res);
3149 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003150 else {
3151 /* Now see if the parser itself has a doctype method. If yes and it's
3152 * a custom method, call it but warn about deprecation. If it's only
3153 * the vanilla XMLParser method, do nothing.
3154 */
3155 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3156 if (parser_doctype &&
3157 !(PyCFunction_Check(parser_doctype) &&
3158 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3159 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003160 (PyCFunction) _elementtree_XMLParser_doctype)) {
3161 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3162 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003163 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003164 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003165 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003166 res = PyObject_CallFunction(parser_doctype, "OOO",
3167 doctype_name_obj, pubid_obj, sysid_obj);
3168 Py_CLEAR(res);
3169 }
3170 }
3171
3172clear:
3173 Py_XDECREF(parser_doctype);
3174 Py_DECREF(doctype_name_obj);
3175 Py_DECREF(pubid_obj);
3176 Py_DECREF(sysid_obj);
3177}
3178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179static void
3180expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3181 const XML_Char* data_in)
3182{
3183 PyObject* target;
3184 PyObject* data;
3185 PyObject* res;
3186
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003187 if (PyErr_Occurred())
3188 return;
3189
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003191 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3192 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193 if (target && data) {
3194 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3195 Py_XDECREF(res);
3196 Py_DECREF(data);
3197 Py_DECREF(target);
3198 } else {
3199 Py_XDECREF(data);
3200 Py_XDECREF(target);
3201 }
3202 }
3203}
3204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003206
Eli Bendersky52467b12012-06-01 07:13:08 +03003207static PyObject *
3208xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209{
Eli Bendersky52467b12012-06-01 07:13:08 +03003210 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3211 if (self) {
3212 self->parser = NULL;
3213 self->target = self->entity = self->names = NULL;
3214 self->handle_start = self->handle_data = self->handle_end = NULL;
3215 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003216 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003218 return (PyObject *)self;
3219}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220
Serhiy Storchakacb985562015-05-04 15:32:48 +03003221/*[clinic input]
3222_elementtree.XMLParser.__init__
3223
3224 html: object = NULL
3225 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003226 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003227
3228[clinic start generated code]*/
3229
Eli Bendersky52467b12012-06-01 07:13:08 +03003230static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003231_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3232 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003233/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003234{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003235 self->entity = PyDict_New();
3236 if (!self->entity)
3237 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238
Serhiy Storchakacb985562015-05-04 15:32:48 +03003239 self->names = PyDict_New();
3240 if (!self->names) {
3241 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003242 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003244
Serhiy Storchakacb985562015-05-04 15:32:48 +03003245 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3246 if (!self->parser) {
3247 Py_CLEAR(self->entity);
3248 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003250 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 }
3252
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 if (target) {
3254 Py_INCREF(target);
3255 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003256 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003258 Py_CLEAR(self->entity);
3259 Py_CLEAR(self->names);
3260 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003261 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003264 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266 self->handle_start = PyObject_GetAttrString(target, "start");
3267 self->handle_data = PyObject_GetAttrString(target, "data");
3268 self->handle_end = PyObject_GetAttrString(target, "end");
3269 self->handle_comment = PyObject_GetAttrString(target, "comment");
3270 self->handle_pi = PyObject_GetAttrString(target, "pi");
3271 self->handle_close = PyObject_GetAttrString(target, "close");
3272 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273
3274 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003277 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 (XML_StartElementHandler) expat_start_handler,
3281 (XML_EndElementHandler) expat_end_handler
3282 );
3283 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285 (XML_DefaultHandler) expat_default_handler
3286 );
3287 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 (XML_CharacterDataHandler) expat_data_handler
3290 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 (XML_CommentHandler) expat_comment_handler
3295 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003298 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 (XML_ProcessingInstructionHandler) expat_pi_handler
3300 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003301 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003303 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3304 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003307 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309
Eli Bendersky52467b12012-06-01 07:13:08 +03003310 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311}
3312
Eli Bendersky52467b12012-06-01 07:13:08 +03003313static int
3314xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3315{
3316 Py_VISIT(self->handle_close);
3317 Py_VISIT(self->handle_pi);
3318 Py_VISIT(self->handle_comment);
3319 Py_VISIT(self->handle_end);
3320 Py_VISIT(self->handle_data);
3321 Py_VISIT(self->handle_start);
3322
3323 Py_VISIT(self->target);
3324 Py_VISIT(self->entity);
3325 Py_VISIT(self->names);
3326
3327 return 0;
3328}
3329
3330static int
3331xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332{
3333 EXPAT(ParserFree)(self->parser);
3334
Antoine Pitrouc1948842012-10-01 23:40:37 +02003335 Py_CLEAR(self->handle_close);
3336 Py_CLEAR(self->handle_pi);
3337 Py_CLEAR(self->handle_comment);
3338 Py_CLEAR(self->handle_end);
3339 Py_CLEAR(self->handle_data);
3340 Py_CLEAR(self->handle_start);
3341 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342
Antoine Pitrouc1948842012-10-01 23:40:37 +02003343 Py_CLEAR(self->target);
3344 Py_CLEAR(self->entity);
3345 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346
Eli Bendersky52467b12012-06-01 07:13:08 +03003347 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348}
3349
Eli Bendersky52467b12012-06-01 07:13:08 +03003350static void
3351xmlparser_dealloc(XMLParserObject* self)
3352{
3353 PyObject_GC_UnTrack(self);
3354 xmlparser_gc_clear(self);
3355 Py_TYPE(self)->tp_free((PyObject *)self);
3356}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357
3358LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003359expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360{
3361 int ok;
3362
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003363 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3365
3366 if (PyErr_Occurred())
3367 return NULL;
3368
3369 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003370 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003371 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003373 EXPAT(GetErrorColumnNumber)(self->parser),
3374 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375 );
3376 return NULL;
3377 }
3378
3379 Py_RETURN_NONE;
3380}
3381
Serhiy Storchakacb985562015-05-04 15:32:48 +03003382/*[clinic input]
3383_elementtree.XMLParser.close
3384
3385[clinic start generated code]*/
3386
3387static PyObject *
3388_elementtree_XMLParser_close_impl(XMLParserObject *self)
3389/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390{
3391 /* end feeding data to parser */
3392
3393 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003395 if (!res)
3396 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003398 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 Py_DECREF(res);
3400 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003401 }
3402 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003403 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003404 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003405 }
3406 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003407 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003408 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409}
3410
Serhiy Storchakacb985562015-05-04 15:32:48 +03003411/*[clinic input]
3412_elementtree.XMLParser.feed
3413
3414 data: object
3415 /
3416
3417[clinic start generated code]*/
3418
3419static PyObject *
3420_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3421/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422{
3423 /* feed data to parser */
3424
Serhiy Storchakacb985562015-05-04 15:32:48 +03003425 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003426 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003427 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3428 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003429 return NULL;
3430 if (data_len > INT_MAX) {
3431 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3432 return NULL;
3433 }
3434 /* Explicitly set UTF-8 encoding. Return code ignored. */
3435 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003436 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003437 }
3438 else {
3439 Py_buffer view;
3440 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003441 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003442 return NULL;
3443 if (view.len > INT_MAX) {
3444 PyBuffer_Release(&view);
3445 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3446 return NULL;
3447 }
3448 res = expat_parse(self, view.buf, (int)view.len, 0);
3449 PyBuffer_Release(&view);
3450 return res;
3451 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452}
3453
Serhiy Storchakacb985562015-05-04 15:32:48 +03003454/*[clinic input]
3455_elementtree.XMLParser._parse_whole
3456
3457 file: object
3458 /
3459
3460[clinic start generated code]*/
3461
3462static PyObject *
3463_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3464/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465{
Eli Benderskya3699232013-05-19 18:47:23 -07003466 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467 PyObject* reader;
3468 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003469 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003470 PyObject* res;
3471
Serhiy Storchakacb985562015-05-04 15:32:48 +03003472 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 if (!reader)
3474 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003475
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 /* read from open file object */
3477 for (;;) {
3478
3479 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3480
3481 if (!buffer) {
3482 /* read failed (e.g. due to KeyboardInterrupt) */
3483 Py_DECREF(reader);
3484 return NULL;
3485 }
3486
Eli Benderskyf996e772012-03-16 05:53:30 +02003487 if (PyUnicode_CheckExact(buffer)) {
3488 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003489 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003490 Py_DECREF(buffer);
3491 break;
3492 }
3493 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003494 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 if (!temp) {
3496 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003497 Py_DECREF(reader);
3498 return NULL;
3499 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003500 buffer = temp;
3501 }
3502 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503 Py_DECREF(buffer);
3504 break;
3505 }
3506
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003507 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3508 Py_DECREF(buffer);
3509 Py_DECREF(reader);
3510 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3511 return NULL;
3512 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003513 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003514 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 );
3516
3517 Py_DECREF(buffer);
3518
3519 if (!res) {
3520 Py_DECREF(reader);
3521 return NULL;
3522 }
3523 Py_DECREF(res);
3524
3525 }
3526
3527 Py_DECREF(reader);
3528
3529 res = expat_parse(self, "", 0, 1);
3530
3531 if (res && TreeBuilder_CheckExact(self->target)) {
3532 Py_DECREF(res);
3533 return treebuilder_done((TreeBuilderObject*) self->target);
3534 }
3535
3536 return res;
3537}
3538
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539/*[clinic input]
3540_elementtree.XMLParser.doctype
3541
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003542 name: object
3543 pubid: object
3544 system: object
3545 /
3546
Serhiy Storchakacb985562015-05-04 15:32:48 +03003547[clinic start generated code]*/
3548
3549static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003550_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3551 PyObject *pubid, PyObject *system)
3552/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003553{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003554 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3555 "This method of XMLParser is deprecated. Define"
3556 " doctype() method on the TreeBuilder target.",
3557 1) < 0) {
3558 return NULL;
3559 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003560 Py_RETURN_NONE;
3561}
3562
Serhiy Storchakacb985562015-05-04 15:32:48 +03003563/*[clinic input]
3564_elementtree.XMLParser._setevents
3565
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003566 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003567 events_to_report: object = None
3568 /
3569
3570[clinic start generated code]*/
3571
3572static PyObject *
3573_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3574 PyObject *events_queue,
3575 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003576/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577{
3578 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003579 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003580 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003581 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582
3583 if (!TreeBuilder_CheckExact(self->target)) {
3584 PyErr_SetString(
3585 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003586 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587 "targets"
3588 );
3589 return NULL;
3590 }
3591
3592 target = (TreeBuilderObject*) self->target;
3593
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003594 events_append = PyObject_GetAttrString(events_queue, "append");
3595 if (events_append == NULL)
3596 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003597 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598
3599 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003600 Py_CLEAR(target->start_event_obj);
3601 Py_CLEAR(target->end_event_obj);
3602 Py_CLEAR(target->start_ns_event_obj);
3603 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003605 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003607 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608 Py_RETURN_NONE;
3609 }
3610
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003611 if (!(events_seq = PySequence_Fast(events_to_report,
3612 "events must be a sequence"))) {
3613 return NULL;
3614 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003616 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003617 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3618 char *event_name = NULL;
3619 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003620 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003621 } else if (PyBytes_Check(event_name_obj)) {
3622 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003623 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003624 if (event_name == NULL) {
3625 Py_DECREF(events_seq);
3626 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3627 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003628 }
3629
3630 Py_INCREF(event_name_obj);
3631 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003632 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003634 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003635 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003636 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637 EXPAT(SetNamespaceDeclHandler)(
3638 self->parser,
3639 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3640 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3641 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003643 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 EXPAT(SetNamespaceDeclHandler)(
3645 self->parser,
3646 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3647 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3648 );
3649 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003650 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003651 Py_DECREF(events_seq);
3652 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653 return NULL;
3654 }
3655 }
3656
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003657 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659}
3660
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003661static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003662xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003663{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003664 if (PyUnicode_Check(nameobj)) {
3665 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003666 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003667 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003668 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003669 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003670 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003671 return PyUnicode_FromFormat(
3672 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003674 }
3675 else
3676 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677
Alexander Belopolskye239d232010-12-08 23:31:48 +00003678 Py_INCREF(res);
3679 return res;
3680 }
3681 generic:
3682 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683}
3684
Serhiy Storchakacb985562015-05-04 15:32:48 +03003685#include "clinic/_elementtree.c.h"
3686
3687static PyMethodDef element_methods[] = {
3688
3689 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3690
3691 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3692 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3693
3694 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3695 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3696 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3697
3698 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3699 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3700 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3701 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3702
3703 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3704 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3705 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3706
Victor Stinner84d8baa2016-09-29 22:12:35 +02003707 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3709
3710 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3711 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3712
3713 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3714
3715 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3716 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3717 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3718 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3719 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3720
3721 {NULL, NULL}
3722};
3723
3724static PyMappingMethods element_as_mapping = {
3725 (lenfunc) element_length,
3726 (binaryfunc) element_subscr,
3727 (objobjargproc) element_ass_subscr,
3728};
3729
Serhiy Storchakadde08152015-11-25 15:28:13 +02003730static PyGetSetDef element_getsetlist[] = {
3731 {"tag",
3732 (getter)element_tag_getter,
3733 (setter)element_tag_setter,
3734 "A string identifying what kind of data this element represents"},
3735 {"text",
3736 (getter)element_text_getter,
3737 (setter)element_text_setter,
3738 "A string of text directly after the start tag, or None"},
3739 {"tail",
3740 (getter)element_tail_getter,
3741 (setter)element_tail_setter,
3742 "A string of text directly after the end tag, or None"},
3743 {"attrib",
3744 (getter)element_attrib_getter,
3745 (setter)element_attrib_setter,
3746 "A dictionary containing the element's attributes"},
3747 {NULL},
3748};
3749
Serhiy Storchakacb985562015-05-04 15:32:48 +03003750static PyTypeObject Element_Type = {
3751 PyVarObject_HEAD_INIT(NULL, 0)
3752 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3753 /* methods */
3754 (destructor)element_dealloc, /* tp_dealloc */
3755 0, /* tp_print */
3756 0, /* tp_getattr */
3757 0, /* tp_setattr */
3758 0, /* tp_reserved */
3759 (reprfunc)element_repr, /* tp_repr */
3760 0, /* tp_as_number */
3761 &element_as_sequence, /* tp_as_sequence */
3762 &element_as_mapping, /* tp_as_mapping */
3763 0, /* tp_hash */
3764 0, /* tp_call */
3765 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003766 PyObject_GenericGetAttr, /* tp_getattro */
3767 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003768 0, /* tp_as_buffer */
3769 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3770 /* tp_flags */
3771 0, /* tp_doc */
3772 (traverseproc)element_gc_traverse, /* tp_traverse */
3773 (inquiry)element_gc_clear, /* tp_clear */
3774 0, /* tp_richcompare */
3775 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3776 0, /* tp_iter */
3777 0, /* tp_iternext */
3778 element_methods, /* tp_methods */
3779 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003780 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003781 0, /* tp_base */
3782 0, /* tp_dict */
3783 0, /* tp_descr_get */
3784 0, /* tp_descr_set */
3785 0, /* tp_dictoffset */
3786 (initproc)element_init, /* tp_init */
3787 PyType_GenericAlloc, /* tp_alloc */
3788 element_new, /* tp_new */
3789 0, /* tp_free */
3790};
3791
3792static PyMethodDef treebuilder_methods[] = {
3793 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3794 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3795 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3796 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3797 {NULL, NULL}
3798};
3799
3800static PyTypeObject TreeBuilder_Type = {
3801 PyVarObject_HEAD_INIT(NULL, 0)
3802 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3803 /* methods */
3804 (destructor)treebuilder_dealloc, /* tp_dealloc */
3805 0, /* tp_print */
3806 0, /* tp_getattr */
3807 0, /* tp_setattr */
3808 0, /* tp_reserved */
3809 0, /* tp_repr */
3810 0, /* tp_as_number */
3811 0, /* tp_as_sequence */
3812 0, /* tp_as_mapping */
3813 0, /* tp_hash */
3814 0, /* tp_call */
3815 0, /* tp_str */
3816 0, /* tp_getattro */
3817 0, /* tp_setattro */
3818 0, /* tp_as_buffer */
3819 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3820 /* tp_flags */
3821 0, /* tp_doc */
3822 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3823 (inquiry)treebuilder_gc_clear, /* tp_clear */
3824 0, /* tp_richcompare */
3825 0, /* tp_weaklistoffset */
3826 0, /* tp_iter */
3827 0, /* tp_iternext */
3828 treebuilder_methods, /* tp_methods */
3829 0, /* tp_members */
3830 0, /* tp_getset */
3831 0, /* tp_base */
3832 0, /* tp_dict */
3833 0, /* tp_descr_get */
3834 0, /* tp_descr_set */
3835 0, /* tp_dictoffset */
3836 _elementtree_TreeBuilder___init__, /* tp_init */
3837 PyType_GenericAlloc, /* tp_alloc */
3838 treebuilder_new, /* tp_new */
3839 0, /* tp_free */
3840};
3841
3842static PyMethodDef xmlparser_methods[] = {
3843 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3844 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3845 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3846 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3847 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3848 {NULL, NULL}
3849};
3850
Neal Norwitz227b5332006-03-22 09:28:35 +00003851static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003852 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003853 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003854 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003855 (destructor)xmlparser_dealloc, /* tp_dealloc */
3856 0, /* tp_print */
3857 0, /* tp_getattr */
3858 0, /* tp_setattr */
3859 0, /* tp_reserved */
3860 0, /* tp_repr */
3861 0, /* tp_as_number */
3862 0, /* tp_as_sequence */
3863 0, /* tp_as_mapping */
3864 0, /* tp_hash */
3865 0, /* tp_call */
3866 0, /* tp_str */
3867 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3868 0, /* tp_setattro */
3869 0, /* tp_as_buffer */
3870 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3871 /* tp_flags */
3872 0, /* tp_doc */
3873 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3874 (inquiry)xmlparser_gc_clear, /* tp_clear */
3875 0, /* tp_richcompare */
3876 0, /* tp_weaklistoffset */
3877 0, /* tp_iter */
3878 0, /* tp_iternext */
3879 xmlparser_methods, /* tp_methods */
3880 0, /* tp_members */
3881 0, /* tp_getset */
3882 0, /* tp_base */
3883 0, /* tp_dict */
3884 0, /* tp_descr_get */
3885 0, /* tp_descr_set */
3886 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003887 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003888 PyType_GenericAlloc, /* tp_alloc */
3889 xmlparser_new, /* tp_new */
3890 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003891};
3892
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003893/* ==================================================================== */
3894/* python module interface */
3895
3896static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003897 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003898 {NULL, NULL}
3899};
3900
Martin v. Löwis1a214512008-06-11 05:26:20 +00003901
Eli Bendersky532d03e2013-08-10 08:00:39 -07003902static struct PyModuleDef elementtreemodule = {
3903 PyModuleDef_HEAD_INIT,
3904 "_elementtree",
3905 NULL,
3906 sizeof(elementtreestate),
3907 _functions,
3908 NULL,
3909 elementtree_traverse,
3910 elementtree_clear,
3911 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003912};
3913
Neal Norwitzf6657e62006-12-28 04:47:50 +00003914PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003915PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003916{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003917 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003918 elementtreestate *st;
3919
3920 m = PyState_FindModule(&elementtreemodule);
3921 if (m) {
3922 Py_INCREF(m);
3923 return m;
3924 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003925
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003926 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003927 if (PyType_Ready(&ElementIter_Type) < 0)
3928 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003929 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003930 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003931 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003932 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003933 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003934 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003935
Eli Bendersky532d03e2013-08-10 08:00:39 -07003936 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003937 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003938 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003939 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003940
Eli Bendersky828efde2012-04-05 05:40:58 +03003941 if (!(temp = PyImport_ImportModule("copy")))
3942 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003943 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003944 Py_XDECREF(temp);
3945
Eli Bendersky532d03e2013-08-10 08:00:39 -07003946 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003947 return NULL;
3948
Eli Bendersky20d41742012-06-01 09:48:37 +03003949 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003950 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3951 if (expat_capi) {
3952 /* check that it's usable */
3953 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003954 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003955 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3956 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003957 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003958 PyErr_SetString(PyExc_ImportError,
3959 "pyexpat version is incompatible");
3960 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003961 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003962 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003963 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003964 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003965
Eli Bendersky532d03e2013-08-10 08:00:39 -07003966 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003967 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003968 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003969 Py_INCREF(st->parseerror_obj);
3970 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003971
Eli Bendersky092af1f2012-03-04 07:14:03 +02003972 Py_INCREF((PyObject *)&Element_Type);
3973 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3974
Eli Bendersky58d548d2012-05-29 15:45:16 +03003975 Py_INCREF((PyObject *)&TreeBuilder_Type);
3976 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3977
Eli Bendersky52467b12012-06-01 07:13:08 +03003978 Py_INCREF((PyObject *)&XMLParser_Type);
3979 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003980
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003981 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003982}