blob: 8678c0a3701f2e91eaf77a6685d4abb0daa8df19 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300629
630 if (self->weakreflist != NULL)
631 PyObject_ClearWeakRefs((PyObject *) self);
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 /* element_gc_clear clears all references and deallocates extra
634 */
635 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000636
637 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200638 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639}
640
641/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
Serhiy Storchakacb985562015-05-04 15:32:48 +0300643/*[clinic input]
644_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645
Serhiy Storchakacb985562015-05-04 15:32:48 +0300646 subelement: object(subclass_of='&Element_Type')
647 /
648
649[clinic start generated code]*/
650
651static PyObject *
652_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
653/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
654{
655 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656 return NULL;
657
658 Py_RETURN_NONE;
659}
660
Serhiy Storchakacb985562015-05-04 15:32:48 +0300661/*[clinic input]
662_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
Serhiy Storchakacb985562015-05-04 15:32:48 +0300664[clinic start generated code]*/
665
666static PyObject *
667_elementtree_Element_clear_impl(ElementObject *self)
668/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
669{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
672 Py_INCREF(Py_None);
673 Py_DECREF(JOIN_OBJ(self->text));
674 self->text = Py_None;
675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->tail));
678 self->tail = Py_None;
679
680 Py_RETURN_NONE;
681}
682
Serhiy Storchakacb985562015-05-04 15:32:48 +0300683/*[clinic input]
684_elementtree.Element.__copy__
685
686[clinic start generated code]*/
687
688static PyObject *
689_elementtree_Element___copy___impl(ElementObject *self)
690/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200692 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 ElementObject* element;
694
Eli Bendersky092af1f2012-03-04 07:14:03 +0200695 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800696 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 if (!element)
698 return NULL;
699
700 Py_DECREF(JOIN_OBJ(element->text));
701 element->text = self->text;
702 Py_INCREF(JOIN_OBJ(element->text));
703
704 Py_DECREF(JOIN_OBJ(element->tail));
705 element->tail = self->tail;
706 Py_INCREF(JOIN_OBJ(element->tail));
707
708 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000709 if (element_resize(element, self->extra->length) < 0) {
710 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 for (i = 0; i < self->extra->length; i++) {
715 Py_INCREF(self->extra->children[i]);
716 element->extra->children[i] = self->extra->children[i];
717 }
718
719 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 }
721
722 return (PyObject*) element;
723}
724
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200725/* Helper for a deep copy. */
726LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
727
Serhiy Storchakacb985562015-05-04 15:32:48 +0300728/*[clinic input]
729_elementtree.Element.__deepcopy__
730
731 memo: object
732 /
733
734[clinic start generated code]*/
735
736static PyObject *
737_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
738/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200740 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 ElementObject* element;
742 PyObject* tag;
743 PyObject* attrib;
744 PyObject* text;
745 PyObject* tail;
746 PyObject* id;
747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 tag = deepcopy(self->tag, memo);
749 if (!tag)
750 return NULL;
751
752 if (self->extra) {
753 attrib = deepcopy(self->extra->attrib, memo);
754 if (!attrib) {
755 Py_DECREF(tag);
756 return NULL;
757 }
758 } else {
759 Py_INCREF(Py_None);
760 attrib = Py_None;
761 }
762
Eli Bendersky092af1f2012-03-04 07:14:03 +0200763 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 Py_DECREF(tag);
766 Py_DECREF(attrib);
767
768 if (!element)
769 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 text = deepcopy(JOIN_OBJ(self->text), memo);
772 if (!text)
773 goto error;
774 Py_DECREF(element->text);
775 element->text = JOIN_SET(text, JOIN_GET(self->text));
776
777 tail = deepcopy(JOIN_OBJ(self->tail), memo);
778 if (!tail)
779 goto error;
780 Py_DECREF(element->tail);
781 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
782
783 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 if (element_resize(element, self->extra->length) < 0)
785 goto error;
786
787 for (i = 0; i < self->extra->length; i++) {
788 PyObject* child = deepcopy(self->extra->children[i], memo);
789 if (!child) {
790 element->extra->length = i;
791 goto error;
792 }
793 element->extra->children[i] = child;
794 }
795
796 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 }
798
799 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700800 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 if (!id)
802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 i = PyDict_SetItem(memo, id, (PyObject*) element);
805
806 Py_DECREF(id);
807
808 if (i < 0)
809 goto error;
810
811 return (PyObject*) element;
812
813 error:
814 Py_DECREF(element);
815 return NULL;
816}
817
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200818LOCAL(PyObject *)
819deepcopy(PyObject *object, PyObject *memo)
820{
821 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200822 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200823 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200824
825 /* Fast paths */
826 if (object == Py_None || PyUnicode_CheckExact(object)) {
827 Py_INCREF(object);
828 return object;
829 }
830
831 if (Py_REFCNT(object) == 1) {
832 if (PyDict_CheckExact(object)) {
833 PyObject *key, *value;
834 Py_ssize_t pos = 0;
835 int simple = 1;
836 while (PyDict_Next(object, &pos, &key, &value)) {
837 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
838 simple = 0;
839 break;
840 }
841 }
842 if (simple)
843 return PyDict_Copy(object);
844 /* Fall through to general case */
845 }
846 else if (Element_CheckExact(object)) {
847 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
848 }
849 }
850
851 /* General case */
852 st = ET_STATE_GLOBAL;
853 if (!st->deepcopy_obj) {
854 PyErr_SetString(PyExc_RuntimeError,
855 "deepcopy helper not found");
856 return NULL;
857 }
858
Victor Stinner7fbac452016-08-20 01:34:44 +0200859 stack[0] = object;
860 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200861 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200862}
863
864
Serhiy Storchakacb985562015-05-04 15:32:48 +0300865/*[clinic input]
866_elementtree.Element.__sizeof__ -> Py_ssize_t
867
868[clinic start generated code]*/
869
870static Py_ssize_t
871_elementtree_Element___sizeof___impl(ElementObject *self)
872/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200873{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200874 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200875 if (self->extra) {
876 result += sizeof(ElementObjectExtra);
877 if (self->extra->children != self->extra->_children)
878 result += sizeof(PyObject*) * self->extra->allocated;
879 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300880 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881}
882
Eli Bendersky698bdb22013-01-10 06:01:06 -0800883/* dict keys for getstate/setstate. */
884#define PICKLED_TAG "tag"
885#define PICKLED_CHILDREN "_children"
886#define PICKLED_ATTRIB "attrib"
887#define PICKLED_TAIL "tail"
888#define PICKLED_TEXT "text"
889
890/* __getstate__ returns a fabricated instance dict as in the pure-Python
891 * Element implementation, for interoperability/interchangeability. This
892 * makes the pure-Python implementation details an API, but (a) there aren't
893 * any unnecessary structures there; and (b) it buys compatibility with 3.2
894 * pickles. See issue #16076.
895 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300896/*[clinic input]
897_elementtree.Element.__getstate__
898
899[clinic start generated code]*/
900
Eli Bendersky698bdb22013-01-10 06:01:06 -0800901static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902_elementtree_Element___getstate___impl(ElementObject *self)
903/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800904{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200905 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800906 PyObject *instancedict = NULL, *children;
907
908 /* Build a list of children. */
909 children = PyList_New(self->extra ? self->extra->length : 0);
910 if (!children)
911 return NULL;
912 for (i = 0; i < PyList_GET_SIZE(children); i++) {
913 PyObject *child = self->extra->children[i];
914 Py_INCREF(child);
915 PyList_SET_ITEM(children, i, child);
916 }
917
918 /* Construct the state object. */
919 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
920 if (noattrib)
921 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
922 PICKLED_TAG, self->tag,
923 PICKLED_CHILDREN, children,
924 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700925 PICKLED_TEXT, JOIN_OBJ(self->text),
926 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800927 else
928 instancedict = Py_BuildValue("{sOsOsOsOsO}",
929 PICKLED_TAG, self->tag,
930 PICKLED_CHILDREN, children,
931 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700932 PICKLED_TEXT, JOIN_OBJ(self->text),
933 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800934 if (instancedict) {
935 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800936 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800937 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800938 else {
939 for (i = 0; i < PyList_GET_SIZE(children); i++)
940 Py_DECREF(PyList_GET_ITEM(children, i));
941 Py_DECREF(children);
942
943 return NULL;
944 }
945}
946
947static PyObject *
948element_setstate_from_attributes(ElementObject *self,
949 PyObject *tag,
950 PyObject *attrib,
951 PyObject *text,
952 PyObject *tail,
953 PyObject *children)
954{
955 Py_ssize_t i, nchildren;
956
957 if (!tag) {
958 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
959 return NULL;
960 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800961
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200962 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300963 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800964
Eli Benderskydd3661e2013-09-13 06:24:25 -0700965 _clear_joined_ptr(&self->text);
966 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
967 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->tail);
970 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
973 /* Handle ATTRIB and CHILDREN. */
974 if (!children && !attrib)
975 Py_RETURN_NONE;
976
977 /* Compute 'nchildren'. */
978 if (children) {
979 if (!PyList_Check(children)) {
980 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
981 return NULL;
982 }
983 nchildren = PyList_Size(children);
984 }
985 else {
986 nchildren = 0;
987 }
988
989 /* Allocate 'extra'. */
990 if (element_resize(self, nchildren)) {
991 return NULL;
992 }
993 assert(self->extra && self->extra->allocated >= nchildren);
994
995 /* Copy children */
996 for (i = 0; i < nchildren; i++) {
997 self->extra->children[i] = PyList_GET_ITEM(children, i);
998 Py_INCREF(self->extra->children[i]);
999 }
1000
1001 self->extra->length = nchildren;
1002 self->extra->allocated = nchildren;
1003
1004 /* Stash attrib. */
1005 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001006 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001007 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001008 }
1009
1010 Py_RETURN_NONE;
1011}
1012
1013/* __setstate__ for Element instance from the Python implementation.
1014 * 'state' should be the instance dict.
1015 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001016
Eli Bendersky698bdb22013-01-10 06:01:06 -08001017static PyObject *
1018element_setstate_from_Python(ElementObject *self, PyObject *state)
1019{
1020 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1021 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1022 PyObject *args;
1023 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001024 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001025
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026 tag = attrib = text = tail = children = NULL;
1027 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030
1031 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1032 &attrib, &text, &tail, &children))
1033 retval = element_setstate_from_attributes(self, tag, attrib, text,
1034 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036 retval = NULL;
1037
1038 Py_DECREF(args);
1039 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001040}
1041
Serhiy Storchakacb985562015-05-04 15:32:48 +03001042/*[clinic input]
1043_elementtree.Element.__setstate__
1044
1045 state: object
1046 /
1047
1048[clinic start generated code]*/
1049
Eli Bendersky698bdb22013-01-10 06:01:06 -08001050static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001051_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1052/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001053{
1054 if (!PyDict_CheckExact(state)) {
1055 PyErr_Format(PyExc_TypeError,
1056 "Don't know how to unpickle \"%.200R\" as an Element",
1057 state);
1058 return NULL;
1059 }
1060 else
1061 return element_setstate_from_Python(self, state);
1062}
1063
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064LOCAL(int)
1065checkpath(PyObject* tag)
1066{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001067 Py_ssize_t i;
1068 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069
1070 /* check if a tag contains an xpath character */
1071
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001072#define PATHCHAR(ch) \
1073 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001074
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1077 void *data = PyUnicode_DATA(tag);
1078 unsigned int kind = PyUnicode_KIND(tag);
1079 for (i = 0; i < len; i++) {
1080 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1081 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001082 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001083 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001084 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 return 1;
1087 }
1088 return 0;
1089 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001090 if (PyBytes_Check(tag)) {
1091 char *p = PyBytes_AS_STRING(tag);
1092 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001093 if (p[i] == '{')
1094 check = 0;
1095 else if (p[i] == '}')
1096 check = 1;
1097 else if (check && PATHCHAR(p[i]))
1098 return 1;
1099 }
1100 return 0;
1101 }
1102
1103 return 1; /* unknown type; might be path expression */
1104}
1105
Serhiy Storchakacb985562015-05-04 15:32:48 +03001106/*[clinic input]
1107_elementtree.Element.extend
1108
1109 elements: object
1110 /
1111
1112[clinic start generated code]*/
1113
1114static PyObject *
1115_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1116/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001117{
1118 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001119 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001120
Serhiy Storchakacb985562015-05-04 15:32:48 +03001121 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 if (!seq) {
1123 PyErr_Format(
1124 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 );
1127 return NULL;
1128 }
1129
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001130 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001131 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001132 Py_INCREF(element);
1133 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001134 PyErr_Format(
1135 PyExc_TypeError,
1136 "expected an Element, not \"%.200s\"",
1137 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 Py_DECREF(seq);
1139 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001140 return NULL;
1141 }
1142
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001143 if (element_add_subelement(self, element) < 0) {
1144 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001145 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001146 return NULL;
1147 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001148 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 }
1150
1151 Py_DECREF(seq);
1152
1153 Py_RETURN_NONE;
1154}
1155
Serhiy Storchakacb985562015-05-04 15:32:48 +03001156/*[clinic input]
1157_elementtree.Element.find
1158
1159 path: object
1160 namespaces: object = None
1161
1162[clinic start generated code]*/
1163
1164static PyObject *
1165_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1166 PyObject *namespaces)
1167/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001169 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001170 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001171
Serhiy Storchakacb985562015-05-04 15:32:48 +03001172 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001173 _Py_IDENTIFIER(find);
Victor Stinnerf5616342016-12-09 15:26:00 +01001174 return _PyObject_CallMethodIdObjArgs(
1175 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001178
1179 if (!self->extra)
1180 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001181
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 for (i = 0; i < self->extra->length; i++) {
1183 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001184 int rc;
1185 if (!Element_CheckExact(item))
1186 continue;
1187 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001188 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001189 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001191 Py_DECREF(item);
1192 if (rc < 0)
1193 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 }
1195
1196 Py_RETURN_NONE;
1197}
1198
Serhiy Storchakacb985562015-05-04 15:32:48 +03001199/*[clinic input]
1200_elementtree.Element.findtext
1201
1202 path: object
1203 default: object = None
1204 namespaces: object = None
1205
1206[clinic start generated code]*/
1207
1208static PyObject *
1209_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1210 PyObject *default_value,
1211 PyObject *namespaces)
1212/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001213{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001214 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001215 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001216 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001217
Serhiy Storchakacb985562015-05-04 15:32:48 +03001218 if (checkpath(path) || namespaces != Py_None)
Victor Stinnerf5616342016-12-09 15:26:00 +01001219 return _PyObject_CallMethodIdObjArgs(
1220 st->elementpath_obj, &PyId_findtext,
1221 self, path, default_value, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001222 );
1223
1224 if (!self->extra) {
1225 Py_INCREF(default_value);
1226 return default_value;
1227 }
1228
1229 for (i = 0; i < self->extra->length; i++) {
1230 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001231 int rc;
1232 if (!Element_CheckExact(item))
1233 continue;
1234 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001235 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001236 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001237 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001238 if (text == Py_None) {
1239 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001240 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001242 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 return text;
1245 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
1247 if (rc < 0)
1248 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 }
1250
1251 Py_INCREF(default_value);
1252 return default_value;
1253}
1254
Serhiy Storchakacb985562015-05-04 15:32:48 +03001255/*[clinic input]
1256_elementtree.Element.findall
1257
1258 path: object
1259 namespaces: object = None
1260
1261[clinic start generated code]*/
1262
1263static PyObject *
1264_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1265 PyObject *namespaces)
1266/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001267{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001268 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001269 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001270 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001271 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001272
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001273 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001274 _Py_IDENTIFIER(findall);
Victor Stinnerf5616342016-12-09 15:26:00 +01001275 return _PyObject_CallMethodIdObjArgs(
1276 st->elementpath_obj, &PyId_findall, self, tag, namespaces, NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001279
1280 out = PyList_New(0);
1281 if (!out)
1282 return NULL;
1283
1284 if (!self->extra)
1285 return out;
1286
1287 for (i = 0; i < self->extra->length; i++) {
1288 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001289 int rc;
1290 if (!Element_CheckExact(item))
1291 continue;
1292 Py_INCREF(item);
1293 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1294 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1295 Py_DECREF(item);
1296 Py_DECREF(out);
1297 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001298 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001299 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001300 }
1301
1302 return out;
1303}
1304
Serhiy Storchakacb985562015-05-04 15:32:48 +03001305/*[clinic input]
1306_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308 path: object
1309 namespaces: object = None
1310
1311[clinic start generated code]*/
1312
1313static PyObject *
1314_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1315 PyObject *namespaces)
1316/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1317{
1318 PyObject* tag = path;
1319 _Py_IDENTIFIER(iterfind);
1320 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001321
Victor Stinnerf5616342016-12-09 15:26:00 +01001322 return _PyObject_CallMethodIdObjArgs(
1323 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324}
1325
Serhiy Storchakacb985562015-05-04 15:32:48 +03001326/*[clinic input]
1327_elementtree.Element.get
1328
1329 key: object
1330 default: object = None
1331
1332[clinic start generated code]*/
1333
1334static PyObject *
1335_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1336 PyObject *default_value)
1337/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
1339 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340
1341 if (!self->extra || self->extra->attrib == Py_None)
1342 value = default_value;
1343 else {
1344 value = PyDict_GetItem(self->extra->attrib, key);
1345 if (!value)
1346 value = default_value;
1347 }
1348
1349 Py_INCREF(value);
1350 return value;
1351}
1352
Serhiy Storchakacb985562015-05-04 15:32:48 +03001353/*[clinic input]
1354_elementtree.Element.getchildren
1355
1356[clinic start generated code]*/
1357
1358static PyObject *
1359_elementtree_Element_getchildren_impl(ElementObject *self)
1360/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001362 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001363 PyObject* list;
1364
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001365 /* FIXME: report as deprecated? */
1366
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 if (!self->extra)
1368 return PyList_New(0);
1369
1370 list = PyList_New(self->extra->length);
1371 if (!list)
1372 return NULL;
1373
1374 for (i = 0; i < self->extra->length; i++) {
1375 PyObject* item = self->extra->children[i];
1376 Py_INCREF(item);
1377 PyList_SET_ITEM(list, i, item);
1378 }
1379
1380 return list;
1381}
1382
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001383
Eli Bendersky64d11e62012-06-15 07:42:50 +03001384static PyObject *
1385create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1386
1387
Serhiy Storchakacb985562015-05-04 15:32:48 +03001388/*[clinic input]
1389_elementtree.Element.iter
1390
1391 tag: object = None
1392
1393[clinic start generated code]*/
1394
Eli Bendersky64d11e62012-06-15 07:42:50 +03001395static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001396_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1397/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001399 if (PyUnicode_Check(tag)) {
1400 if (PyUnicode_READY(tag) < 0)
1401 return NULL;
1402 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1403 tag = Py_None;
1404 }
1405 else if (PyBytes_Check(tag)) {
1406 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1407 tag = Py_None;
1408 }
1409
Eli Bendersky64d11e62012-06-15 07:42:50 +03001410 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001411}
1412
1413
Serhiy Storchakacb985562015-05-04 15:32:48 +03001414/*[clinic input]
1415_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416
Serhiy Storchakacb985562015-05-04 15:32:48 +03001417[clinic start generated code]*/
1418
1419static PyObject *
1420_elementtree_Element_itertext_impl(ElementObject *self)
1421/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1422{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001423 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001424}
1425
Eli Bendersky64d11e62012-06-15 07:42:50 +03001426
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001428element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001430 ElementObject* self = (ElementObject*) self_;
1431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432 if (!self->extra || index < 0 || index >= self->extra->length) {
1433 PyErr_SetString(
1434 PyExc_IndexError,
1435 "child index out of range"
1436 );
1437 return NULL;
1438 }
1439
1440 Py_INCREF(self->extra->children[index]);
1441 return self->extra->children[index];
1442}
1443
Serhiy Storchakacb985562015-05-04 15:32:48 +03001444/*[clinic input]
1445_elementtree.Element.insert
1446
1447 index: Py_ssize_t
1448 subelement: object(subclass_of='&Element_Type')
1449 /
1450
1451[clinic start generated code]*/
1452
1453static PyObject *
1454_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1455 PyObject *subelement)
1456/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001457{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001458 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001459
Victor Stinner5f0af232013-07-11 23:01:36 +02001460 if (!self->extra) {
1461 if (create_extra(self, NULL) < 0)
1462 return NULL;
1463 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001465 if (index < 0) {
1466 index += self->extra->length;
1467 if (index < 0)
1468 index = 0;
1469 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470 if (index > self->extra->length)
1471 index = self->extra->length;
1472
1473 if (element_resize(self, 1) < 0)
1474 return NULL;
1475
1476 for (i = self->extra->length; i > index; i--)
1477 self->extra->children[i] = self->extra->children[i-1];
1478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479 Py_INCREF(subelement);
1480 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481
1482 self->extra->length++;
1483
1484 Py_RETURN_NONE;
1485}
1486
Serhiy Storchakacb985562015-05-04 15:32:48 +03001487/*[clinic input]
1488_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001489
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490[clinic start generated code]*/
1491
1492static PyObject *
1493_elementtree_Element_items_impl(ElementObject *self)
1494/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1495{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496 if (!self->extra || self->extra->attrib == Py_None)
1497 return PyList_New(0);
1498
1499 return PyDict_Items(self->extra->attrib);
1500}
1501
Serhiy Storchakacb985562015-05-04 15:32:48 +03001502/*[clinic input]
1503_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001504
Serhiy Storchakacb985562015-05-04 15:32:48 +03001505[clinic start generated code]*/
1506
1507static PyObject *
1508_elementtree_Element_keys_impl(ElementObject *self)
1509/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1510{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511 if (!self->extra || self->extra->attrib == Py_None)
1512 return PyList_New(0);
1513
1514 return PyDict_Keys(self->extra->attrib);
1515}
1516
Martin v. Löwis18e16552006-02-15 17:27:45 +00001517static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518element_length(ElementObject* self)
1519{
1520 if (!self->extra)
1521 return 0;
1522
1523 return self->extra->length;
1524}
1525
Serhiy Storchakacb985562015-05-04 15:32:48 +03001526/*[clinic input]
1527_elementtree.Element.makeelement
1528
1529 tag: object
1530 attrib: object
1531 /
1532
1533[clinic start generated code]*/
1534
1535static PyObject *
1536_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1537 PyObject *attrib)
1538/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539{
1540 PyObject* elem;
1541
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542 attrib = PyDict_Copy(attrib);
1543 if (!attrib)
1544 return NULL;
1545
Eli Bendersky092af1f2012-03-04 07:14:03 +02001546 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547
1548 Py_DECREF(attrib);
1549
1550 return elem;
1551}
1552
Serhiy Storchakacb985562015-05-04 15:32:48 +03001553/*[clinic input]
1554_elementtree.Element.remove
1555
1556 subelement: object(subclass_of='&Element_Type')
1557 /
1558
1559[clinic start generated code]*/
1560
1561static PyObject *
1562_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1563/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001565 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001566 int rc;
1567 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569 if (!self->extra) {
1570 /* element has no children, so raise exception */
1571 PyErr_SetString(
1572 PyExc_ValueError,
1573 "list.remove(x): x not in list"
1574 );
1575 return NULL;
1576 }
1577
1578 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001579 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001581 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001582 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001584 if (rc < 0)
1585 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 }
1587
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001588 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001589 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 PyErr_SetString(
1591 PyExc_ValueError,
1592 "list.remove(x): x not in list"
1593 );
1594 return NULL;
1595 }
1596
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001597 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001598
1599 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001600 for (; i < self->extra->length; i++)
1601 self->extra->children[i] = self->extra->children[i+1];
1602
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001603 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604 Py_RETURN_NONE;
1605}
1606
1607static PyObject*
1608element_repr(ElementObject* self)
1609{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001610 int status;
1611
1612 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001613 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001614
1615 status = Py_ReprEnter((PyObject *)self);
1616 if (status == 0) {
1617 PyObject *res;
1618 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1619 Py_ReprLeave((PyObject *)self);
1620 return res;
1621 }
1622 if (status > 0)
1623 PyErr_Format(PyExc_RuntimeError,
1624 "reentrant call inside %s.__repr__",
1625 Py_TYPE(self)->tp_name);
1626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627}
1628
Serhiy Storchakacb985562015-05-04 15:32:48 +03001629/*[clinic input]
1630_elementtree.Element.set
1631
1632 key: object
1633 value: object
1634 /
1635
1636[clinic start generated code]*/
1637
1638static PyObject *
1639_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1640 PyObject *value)
1641/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642{
1643 PyObject* attrib;
1644
Victor Stinner5f0af232013-07-11 23:01:36 +02001645 if (!self->extra) {
1646 if (create_extra(self, NULL) < 0)
1647 return NULL;
1648 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001649
1650 attrib = element_get_attrib(self);
1651 if (!attrib)
1652 return NULL;
1653
1654 if (PyDict_SetItem(attrib, key, value) < 0)
1655 return NULL;
1656
1657 Py_RETURN_NONE;
1658}
1659
1660static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001661element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001662{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001663 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001664 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665 PyObject* old;
1666
1667 if (!self->extra || index < 0 || index >= self->extra->length) {
1668 PyErr_SetString(
1669 PyExc_IndexError,
1670 "child assignment index out of range");
1671 return -1;
1672 }
1673
1674 old = self->extra->children[index];
1675
1676 if (item) {
1677 Py_INCREF(item);
1678 self->extra->children[index] = item;
1679 } else {
1680 self->extra->length--;
1681 for (i = index; i < self->extra->length; i++)
1682 self->extra->children[i] = self->extra->children[i+1];
1683 }
1684
1685 Py_DECREF(old);
1686
1687 return 0;
1688}
1689
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001690static PyObject*
1691element_subscr(PyObject* self_, PyObject* item)
1692{
1693 ElementObject* self = (ElementObject*) self_;
1694
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001695 if (PyIndex_Check(item)) {
1696 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001697
1698 if (i == -1 && PyErr_Occurred()) {
1699 return NULL;
1700 }
1701 if (i < 0 && self->extra)
1702 i += self->extra->length;
1703 return element_getitem(self_, i);
1704 }
1705 else if (PySlice_Check(item)) {
1706 Py_ssize_t start, stop, step, slicelen, cur, i;
1707 PyObject* list;
1708
1709 if (!self->extra)
1710 return PyList_New(0);
1711
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001712 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001713 self->extra->length,
1714 &start, &stop, &step, &slicelen) < 0) {
1715 return NULL;
1716 }
1717
1718 if (slicelen <= 0)
1719 return PyList_New(0);
1720 else {
1721 list = PyList_New(slicelen);
1722 if (!list)
1723 return NULL;
1724
1725 for (cur = start, i = 0; i < slicelen;
1726 cur += step, i++) {
1727 PyObject* item = self->extra->children[cur];
1728 Py_INCREF(item);
1729 PyList_SET_ITEM(list, i, item);
1730 }
1731
1732 return list;
1733 }
1734 }
1735 else {
1736 PyErr_SetString(PyExc_TypeError,
1737 "element indices must be integers");
1738 return NULL;
1739 }
1740}
1741
1742static int
1743element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1744{
1745 ElementObject* self = (ElementObject*) self_;
1746
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001747 if (PyIndex_Check(item)) {
1748 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001749
1750 if (i == -1 && PyErr_Occurred()) {
1751 return -1;
1752 }
1753 if (i < 0 && self->extra)
1754 i += self->extra->length;
1755 return element_setitem(self_, i, value);
1756 }
1757 else if (PySlice_Check(item)) {
1758 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1759
1760 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001761 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762
Victor Stinner5f0af232013-07-11 23:01:36 +02001763 if (!self->extra) {
1764 if (create_extra(self, NULL) < 0)
1765 return -1;
1766 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001767
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001768 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769 self->extra->length,
1770 &start, &stop, &step, &slicelen) < 0) {
1771 return -1;
1772 }
1773
Eli Bendersky865756a2012-03-09 13:38:15 +02001774 if (value == NULL) {
1775 /* Delete slice */
1776 size_t cur;
1777 Py_ssize_t i;
1778
1779 if (slicelen <= 0)
1780 return 0;
1781
1782 /* Since we're deleting, the direction of the range doesn't matter,
1783 * so for simplicity make it always ascending.
1784 */
1785 if (step < 0) {
1786 stop = start + 1;
1787 start = stop + step * (slicelen - 1) - 1;
1788 step = -step;
1789 }
1790
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001791 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001792
1793 /* recycle is a list that will contain all the children
1794 * scheduled for removal.
1795 */
1796 if (!(recycle = PyList_New(slicelen))) {
1797 PyErr_NoMemory();
1798 return -1;
1799 }
1800
1801 /* This loop walks over all the children that have to be deleted,
1802 * with cur pointing at them. num_moved is the amount of children
1803 * until the next deleted child that have to be "shifted down" to
1804 * occupy the deleted's places.
1805 * Note that in the ith iteration, shifting is done i+i places down
1806 * because i children were already removed.
1807 */
1808 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1809 /* Compute how many children have to be moved, clipping at the
1810 * list end.
1811 */
1812 Py_ssize_t num_moved = step - 1;
1813 if (cur + step >= (size_t)self->extra->length) {
1814 num_moved = self->extra->length - cur - 1;
1815 }
1816
1817 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1818
1819 memmove(
1820 self->extra->children + cur - i,
1821 self->extra->children + cur + 1,
1822 num_moved * sizeof(PyObject *));
1823 }
1824
1825 /* Leftover "tail" after the last removed child */
1826 cur = start + (size_t)slicelen * step;
1827 if (cur < (size_t)self->extra->length) {
1828 memmove(
1829 self->extra->children + cur - slicelen,
1830 self->extra->children + cur,
1831 (self->extra->length - cur) * sizeof(PyObject *));
1832 }
1833
1834 self->extra->length -= slicelen;
1835
1836 /* Discard the recycle list with all the deleted sub-elements */
1837 Py_XDECREF(recycle);
1838 return 0;
1839 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001840
1841 /* A new slice is actually being assigned */
1842 seq = PySequence_Fast(value, "");
1843 if (!seq) {
1844 PyErr_Format(
1845 PyExc_TypeError,
1846 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1847 );
1848 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001850 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001851
1852 if (step != 1 && newlen != slicelen)
1853 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001854 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001856 "attempt to assign sequence of size %zd "
1857 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858 newlen, slicelen
1859 );
1860 return -1;
1861 }
1862
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001863 /* Resize before creating the recycle bin, to prevent refleaks. */
1864 if (newlen > slicelen) {
1865 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001866 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001867 return -1;
1868 }
1869 }
1870
1871 if (slicelen > 0) {
1872 /* to avoid recursive calls to this method (via decref), move
1873 old items to the recycle bin here, and get rid of them when
1874 we're done modifying the element */
1875 recycle = PyList_New(slicelen);
1876 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001877 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878 return -1;
1879 }
1880 for (cur = start, i = 0; i < slicelen;
1881 cur += step, i++)
1882 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1883 }
1884
1885 if (newlen < slicelen) {
1886 /* delete slice */
1887 for (i = stop; i < self->extra->length; i++)
1888 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1889 } else if (newlen > slicelen) {
1890 /* insert slice */
1891 for (i = self->extra->length-1; i >= stop; i--)
1892 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1893 }
1894
1895 /* replace the slice */
1896 for (cur = start, i = 0; i < newlen;
1897 cur += step, i++) {
1898 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1899 Py_INCREF(element);
1900 self->extra->children[cur] = element;
1901 }
1902
1903 self->extra->length += newlen - slicelen;
1904
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001905 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001906
1907 /* discard the recycle bin, and everything in it */
1908 Py_XDECREF(recycle);
1909
1910 return 0;
1911 }
1912 else {
1913 PyErr_SetString(PyExc_TypeError,
1914 "element indices must be integers");
1915 return -1;
1916 }
1917}
1918
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001920element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001922 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001923 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924 return res;
1925}
1926
Serhiy Storchakadde08152015-11-25 15:28:13 +02001927static PyObject*
1928element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001929{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001930 PyObject *res = element_get_text(self);
1931 Py_XINCREF(res);
1932 return res;
1933}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001934
Serhiy Storchakadde08152015-11-25 15:28:13 +02001935static PyObject*
1936element_tail_getter(ElementObject *self, void *closure)
1937{
1938 PyObject *res = element_get_tail(self);
1939 Py_XINCREF(res);
1940 return res;
1941}
1942
1943static PyObject*
1944element_attrib_getter(ElementObject *self, void *closure)
1945{
1946 PyObject *res;
1947 if (!self->extra) {
1948 if (create_extra(self, NULL) < 0)
1949 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001950 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001951 res = element_get_attrib(self);
1952 Py_XINCREF(res);
1953 return res;
1954}
Victor Stinner4d463432013-07-11 23:05:03 +02001955
Serhiy Storchakadde08152015-11-25 15:28:13 +02001956/* macro for setter validation */
1957#define _VALIDATE_ATTR_VALUE(V) \
1958 if ((V) == NULL) { \
1959 PyErr_SetString( \
1960 PyExc_AttributeError, \
1961 "can't delete element attribute"); \
1962 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001963 }
1964
Serhiy Storchakadde08152015-11-25 15:28:13 +02001965static int
1966element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1967{
1968 _VALIDATE_ATTR_VALUE(value);
1969 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001970 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001971 return 0;
1972}
1973
1974static int
1975element_text_setter(ElementObject *self, PyObject *value, void *closure)
1976{
1977 _VALIDATE_ATTR_VALUE(value);
1978 Py_INCREF(value);
1979 Py_DECREF(JOIN_OBJ(self->text));
1980 self->text = value;
1981 return 0;
1982}
1983
1984static int
1985element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1986{
1987 _VALIDATE_ATTR_VALUE(value);
1988 Py_INCREF(value);
1989 Py_DECREF(JOIN_OBJ(self->tail));
1990 self->tail = value;
1991 return 0;
1992}
1993
1994static int
1995element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1996{
1997 _VALIDATE_ATTR_VALUE(value);
1998 if (!self->extra) {
1999 if (create_extra(self, NULL) < 0)
2000 return -1;
2001 }
2002 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002003 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002004 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002005}
2006
2007static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002008 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002009 0, /* sq_concat */
2010 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002012 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002013 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002014 0,
2015};
2016
Eli Bendersky64d11e62012-06-15 07:42:50 +03002017/******************************* Element iterator ****************************/
2018
2019/* ElementIterObject represents the iteration state over an XML element in
2020 * pre-order traversal. To keep track of which sub-element should be returned
2021 * next, a stack of parents is maintained. This is a standard stack-based
2022 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002023 * The stack is managed using a continuous array.
2024 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002025 * the current one is exhausted, and the next child to examine in that parent.
2026 */
2027typedef struct ParentLocator_t {
2028 ElementObject *parent;
2029 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002030} ParentLocator;
2031
2032typedef struct {
2033 PyObject_HEAD
2034 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002035 Py_ssize_t parent_stack_used;
2036 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002037 ElementObject *root_element;
2038 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002039 int gettext;
2040} ElementIterObject;
2041
2042
2043static void
2044elementiter_dealloc(ElementIterObject *it)
2045{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002046 Py_ssize_t i = it->parent_stack_used;
2047 it->parent_stack_used = 0;
2048 while (i--)
2049 Py_XDECREF(it->parent_stack[i].parent);
2050 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002051
2052 Py_XDECREF(it->sought_tag);
2053 Py_XDECREF(it->root_element);
2054
2055 PyObject_GC_UnTrack(it);
2056 PyObject_GC_Del(it);
2057}
2058
2059static int
2060elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2061{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002062 Py_ssize_t i = it->parent_stack_used;
2063 while (i--)
2064 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065
2066 Py_VISIT(it->root_element);
2067 Py_VISIT(it->sought_tag);
2068 return 0;
2069}
2070
2071/* Helper function for elementiter_next. Add a new parent to the parent stack.
2072 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002073static int
2074parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002075{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002076 ParentLocator *item;
2077
2078 if (it->parent_stack_used >= it->parent_stack_size) {
2079 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2080 ParentLocator *parent_stack = it->parent_stack;
2081 PyMem_Resize(parent_stack, ParentLocator, new_size);
2082 if (parent_stack == NULL)
2083 return -1;
2084 it->parent_stack = parent_stack;
2085 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002086 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002087 item = it->parent_stack + it->parent_stack_used++;
2088 Py_INCREF(parent);
2089 item->parent = parent;
2090 item->child_index = 0;
2091 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092}
2093
2094static PyObject *
2095elementiter_next(ElementIterObject *it)
2096{
2097 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002098 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099 * A short note on gettext: this function serves both the iter() and
2100 * itertext() methods to avoid code duplication. However, there are a few
2101 * small differences in the way these iterations work. Namely:
2102 * - itertext() only yields text from nodes that have it, and continues
2103 * iterating when a node doesn't have text (so it doesn't return any
2104 * node like iter())
2105 * - itertext() also has to handle tail, after finishing with all the
2106 * children of a node.
2107 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002108 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002109 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002110 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111
2112 while (1) {
2113 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002114 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115 * iterator is exhausted.
2116 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002117 if (!it->parent_stack_used) {
2118 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 PyErr_SetNone(PyExc_StopIteration);
2120 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 }
2122
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002123 elem = it->root_element; /* steals a reference */
2124 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 }
2126 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002127 /* See if there are children left to traverse in the current parent. If
2128 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002129 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2131 Py_ssize_t child_index = item->child_index;
2132 ElementObjectExtra *extra;
2133 elem = item->parent;
2134 extra = elem->extra;
2135 if (!extra || child_index >= extra->length) {
2136 it->parent_stack_used--;
2137 /* Note that extra condition on it->parent_stack_used here;
2138 * this is because itertext() is supposed to only return *inner*
2139 * text, not text following the element it began iteration with.
2140 */
2141 if (it->gettext && it->parent_stack_used) {
2142 text = element_get_tail(elem);
2143 goto gettext;
2144 }
2145 Py_DECREF(elem);
2146 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002147 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002148
2149 elem = (ElementObject *)extra->children[child_index];
2150 item->child_index++;
2151 Py_INCREF(elem);
2152 }
2153
2154 if (parent_stack_push_new(it, elem) < 0) {
2155 Py_DECREF(elem);
2156 PyErr_NoMemory();
2157 return NULL;
2158 }
2159 if (it->gettext) {
2160 text = element_get_text(elem);
2161 goto gettext;
2162 }
2163
2164 if (it->sought_tag == Py_None)
2165 return (PyObject *)elem;
2166
2167 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2168 if (rc > 0)
2169 return (PyObject *)elem;
2170
2171 Py_DECREF(elem);
2172 if (rc < 0)
2173 return NULL;
2174 continue;
2175
2176gettext:
2177 if (!text) {
2178 Py_DECREF(elem);
2179 return NULL;
2180 }
2181 if (text == Py_None) {
2182 Py_DECREF(elem);
2183 }
2184 else {
2185 Py_INCREF(text);
2186 Py_DECREF(elem);
2187 rc = PyObject_IsTrue(text);
2188 if (rc > 0)
2189 return text;
2190 Py_DECREF(text);
2191 if (rc < 0)
2192 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002193 }
2194 }
2195
2196 return NULL;
2197}
2198
2199
2200static PyTypeObject ElementIter_Type = {
2201 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002202 /* Using the module's name since the pure-Python implementation does not
2203 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002204 "_elementtree._element_iterator", /* tp_name */
2205 sizeof(ElementIterObject), /* tp_basicsize */
2206 0, /* tp_itemsize */
2207 /* methods */
2208 (destructor)elementiter_dealloc, /* tp_dealloc */
2209 0, /* tp_print */
2210 0, /* tp_getattr */
2211 0, /* tp_setattr */
2212 0, /* tp_reserved */
2213 0, /* tp_repr */
2214 0, /* tp_as_number */
2215 0, /* tp_as_sequence */
2216 0, /* tp_as_mapping */
2217 0, /* tp_hash */
2218 0, /* tp_call */
2219 0, /* tp_str */
2220 0, /* tp_getattro */
2221 0, /* tp_setattro */
2222 0, /* tp_as_buffer */
2223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2224 0, /* tp_doc */
2225 (traverseproc)elementiter_traverse, /* tp_traverse */
2226 0, /* tp_clear */
2227 0, /* tp_richcompare */
2228 0, /* tp_weaklistoffset */
2229 PyObject_SelfIter, /* tp_iter */
2230 (iternextfunc)elementiter_next, /* tp_iternext */
2231 0, /* tp_methods */
2232 0, /* tp_members */
2233 0, /* tp_getset */
2234 0, /* tp_base */
2235 0, /* tp_dict */
2236 0, /* tp_descr_get */
2237 0, /* tp_descr_set */
2238 0, /* tp_dictoffset */
2239 0, /* tp_init */
2240 0, /* tp_alloc */
2241 0, /* tp_new */
2242};
2243
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002244#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002245
2246static PyObject *
2247create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2248{
2249 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002250
2251 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2252 if (!it)
2253 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254
Victor Stinner4d463432013-07-11 23:05:03 +02002255 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002256 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002258 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002259 it->root_element = self;
2260
Eli Bendersky64d11e62012-06-15 07:42:50 +03002261 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002262
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002263 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002264 if (it->parent_stack == NULL) {
2265 Py_DECREF(it);
2266 PyErr_NoMemory();
2267 return NULL;
2268 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002269 it->parent_stack_used = 0;
2270 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002271
Eli Bendersky64d11e62012-06-15 07:42:50 +03002272 return (PyObject *)it;
2273}
2274
2275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276/* ==================================================================== */
2277/* the tree builder type */
2278
2279typedef struct {
2280 PyObject_HEAD
2281
Eli Bendersky58d548d2012-05-29 15:45:16 +03002282 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283
Antoine Pitrouee329312012-10-04 19:53:29 +02002284 PyObject *this; /* current node */
2285 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286
Eli Bendersky58d548d2012-05-29 15:45:16 +03002287 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288
Eli Bendersky58d548d2012-05-29 15:45:16 +03002289 PyObject *stack; /* element stack */
2290 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291
Eli Bendersky48d358b2012-05-30 17:57:50 +03002292 PyObject *element_factory;
2293
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002294 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002295 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002296 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2297 PyObject *end_event_obj;
2298 PyObject *start_ns_event_obj;
2299 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300} TreeBuilderObject;
2301
Christian Heimes90aa7642007-12-19 02:45:37 +00002302#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303
2304/* -------------------------------------------------------------------- */
2305/* constructor and destructor */
2306
Eli Bendersky58d548d2012-05-29 15:45:16 +03002307static PyObject *
2308treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2311 if (t != NULL) {
2312 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002313
Eli Bendersky58d548d2012-05-29 15:45:16 +03002314 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002315 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002317 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318
Eli Bendersky58d548d2012-05-29 15:45:16 +03002319 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002320 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 t->stack = PyList_New(20);
2322 if (!t->stack) {
2323 Py_DECREF(t->this);
2324 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002325 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 return NULL;
2327 }
2328 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002329
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002330 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002331 t->start_event_obj = t->end_event_obj = NULL;
2332 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2333 }
2334 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335}
2336
Serhiy Storchakacb985562015-05-04 15:32:48 +03002337/*[clinic input]
2338_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002339
Serhiy Storchakacb985562015-05-04 15:32:48 +03002340 element_factory: object = NULL
2341
2342[clinic start generated code]*/
2343
2344static int
2345_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2346 PyObject *element_factory)
2347/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2348{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349 if (element_factory) {
2350 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002351 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002352 }
2353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355}
2356
Eli Bendersky48d358b2012-05-30 17:57:50 +03002357static int
2358treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2359{
2360 Py_VISIT(self->root);
2361 Py_VISIT(self->this);
2362 Py_VISIT(self->last);
2363 Py_VISIT(self->data);
2364 Py_VISIT(self->stack);
2365 Py_VISIT(self->element_factory);
2366 return 0;
2367}
2368
2369static int
2370treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002372 Py_CLEAR(self->end_ns_event_obj);
2373 Py_CLEAR(self->start_ns_event_obj);
2374 Py_CLEAR(self->end_event_obj);
2375 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002376 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002377 Py_CLEAR(self->stack);
2378 Py_CLEAR(self->data);
2379 Py_CLEAR(self->last);
2380 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002381 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002382 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002383 return 0;
2384}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386static void
2387treebuilder_dealloc(TreeBuilderObject *self)
2388{
2389 PyObject_GC_UnTrack(self);
2390 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392}
2393
2394/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002395/* helpers for handling of arbitrary element-like objects */
2396
2397static int
2398treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2399 PyObject **dest, _Py_Identifier *name)
2400{
2401 if (Element_CheckExact(element)) {
2402 Py_DECREF(JOIN_OBJ(*dest));
2403 *dest = JOIN_SET(data, PyList_CheckExact(data));
2404 return 0;
2405 }
2406 else {
2407 PyObject *joined = list_join(data);
2408 int r;
2409 if (joined == NULL)
2410 return -1;
2411 r = _PyObject_SetAttrId(element, name, joined);
2412 Py_DECREF(joined);
2413 return r;
2414 }
2415}
2416
2417/* These two functions steal a reference to data */
2418static int
2419treebuilder_set_element_text(PyObject *element, PyObject *data)
2420{
2421 _Py_IDENTIFIER(text);
2422 return treebuilder_set_element_text_or_tail(
2423 element, data, &((ElementObject *) element)->text, &PyId_text);
2424}
2425
2426static int
2427treebuilder_set_element_tail(PyObject *element, PyObject *data)
2428{
2429 _Py_IDENTIFIER(tail);
2430 return treebuilder_set_element_text_or_tail(
2431 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2432}
2433
2434static int
2435treebuilder_add_subelement(PyObject *element, PyObject *child)
2436{
2437 _Py_IDENTIFIER(append);
2438 if (Element_CheckExact(element)) {
2439 ElementObject *elem = (ElementObject *) element;
2440 return element_add_subelement(elem, child);
2441 }
2442 else {
2443 PyObject *res;
Victor Stinnerf5616342016-12-09 15:26:00 +01002444 res = _PyObject_CallMethodIdObjArgs(element, &PyId_append, child, NULL);
Antoine Pitrouee329312012-10-04 19:53:29 +02002445 if (res == NULL)
2446 return -1;
2447 Py_DECREF(res);
2448 return 0;
2449 }
2450}
2451
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002452LOCAL(int)
2453treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2454 PyObject *node)
2455{
2456 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002457 PyObject *res;
2458 PyObject *event = PyTuple_Pack(2, action, node);
2459 if (event == NULL)
2460 return -1;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01002461 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002462 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002463 if (res == NULL)
2464 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002465 Py_DECREF(res);
2466 }
2467 return 0;
2468}
2469
Antoine Pitrouee329312012-10-04 19:53:29 +02002470/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471/* handlers */
2472
2473LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2475 PyObject* attrib)
2476{
2477 PyObject* node;
2478 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002479 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480
2481 if (self->data) {
2482 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002483 if (treebuilder_set_element_text(self->last, self->data))
2484 return NULL;
2485 }
2486 else {
2487 if (treebuilder_set_element_tail(self->last, self->data))
2488 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 }
2490 self->data = NULL;
2491 }
2492
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002493 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002494 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002495 } else if (attrib == Py_None) {
2496 attrib = PyDict_New();
2497 if (!attrib)
2498 return NULL;
2499 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2500 Py_DECREF(attrib);
2501 }
2502 else {
2503 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002504 }
2505 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002507 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508
Antoine Pitrouee329312012-10-04 19:53:29 +02002509 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510
2511 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002513 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514 } else {
2515 if (self->root) {
2516 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002517 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 "multiple elements on top level"
2519 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 }
2522 Py_INCREF(node);
2523 self->root = node;
2524 }
2525
2526 if (self->index < PyList_GET_SIZE(self->stack)) {
2527 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002528 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 Py_INCREF(this);
2530 } else {
2531 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002532 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 }
2534 self->index++;
2535
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002537 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002539 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002541 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2542 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
2544 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002545
2546 error:
2547 Py_DECREF(node);
2548 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549}
2550
2551LOCAL(PyObject*)
2552treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2553{
2554 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002555 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002556 /* ignore calls to data before the first call to start */
2557 Py_RETURN_NONE;
2558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559 /* store the first item as is */
2560 Py_INCREF(data); self->data = data;
2561 } else {
2562 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002563 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2564 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002565 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566 /* expat often generates single character data sections; handle
2567 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002568 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2569 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002571 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572 } else if (PyList_CheckExact(self->data)) {
2573 if (PyList_Append(self->data, data) < 0)
2574 return NULL;
2575 } else {
2576 PyObject* list = PyList_New(2);
2577 if (!list)
2578 return NULL;
2579 PyList_SET_ITEM(list, 0, self->data);
2580 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2581 self->data = list;
2582 }
2583 }
2584
2585 Py_RETURN_NONE;
2586}
2587
2588LOCAL(PyObject*)
2589treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2590{
2591 PyObject* item;
2592
2593 if (self->data) {
2594 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002595 if (treebuilder_set_element_text(self->last, self->data))
2596 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002598 if (treebuilder_set_element_tail(self->last, self->data))
2599 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600 }
2601 self->data = NULL;
2602 }
2603
2604 if (self->index == 0) {
2605 PyErr_SetString(
2606 PyExc_IndexError,
2607 "pop from empty stack"
2608 );
2609 return NULL;
2610 }
2611
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002612 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002613 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002614 self->index--;
2615 self->this = PyList_GET_ITEM(self->stack, self->index);
2616 Py_INCREF(self->this);
2617 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002619 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2620 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621
2622 Py_INCREF(self->last);
2623 return (PyObject*) self->last;
2624}
2625
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626/* -------------------------------------------------------------------- */
2627/* methods (in alphabetical order) */
2628
Serhiy Storchakacb985562015-05-04 15:32:48 +03002629/*[clinic input]
2630_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631
Serhiy Storchakacb985562015-05-04 15:32:48 +03002632 data: object
2633 /
2634
2635[clinic start generated code]*/
2636
2637static PyObject *
2638_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2639/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2640{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641 return treebuilder_handle_data(self, data);
2642}
2643
Serhiy Storchakacb985562015-05-04 15:32:48 +03002644/*[clinic input]
2645_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646
Serhiy Storchakacb985562015-05-04 15:32:48 +03002647 tag: object
2648 /
2649
2650[clinic start generated code]*/
2651
2652static PyObject *
2653_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2654/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2655{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656 return treebuilder_handle_end(self, tag);
2657}
2658
2659LOCAL(PyObject*)
2660treebuilder_done(TreeBuilderObject* self)
2661{
2662 PyObject* res;
2663
2664 /* FIXME: check stack size? */
2665
2666 if (self->root)
2667 res = self->root;
2668 else
2669 res = Py_None;
2670
2671 Py_INCREF(res);
2672 return res;
2673}
2674
Serhiy Storchakacb985562015-05-04 15:32:48 +03002675/*[clinic input]
2676_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677
Serhiy Storchakacb985562015-05-04 15:32:48 +03002678[clinic start generated code]*/
2679
2680static PyObject *
2681_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2682/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2683{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684 return treebuilder_done(self);
2685}
2686
Serhiy Storchakacb985562015-05-04 15:32:48 +03002687/*[clinic input]
2688_elementtree.TreeBuilder.start
2689
2690 tag: object
2691 attrs: object = None
2692 /
2693
2694[clinic start generated code]*/
2695
2696static PyObject *
2697_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2698 PyObject *attrs)
2699/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002701 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702}
2703
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704/* ==================================================================== */
2705/* the expat interface */
2706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002709
2710/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2711 * cached globally without being in per-module state.
2712 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002713static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Eli Bendersky52467b12012-06-01 07:13:08 +03002716static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2717 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719typedef struct {
2720 PyObject_HEAD
2721
2722 XML_Parser parser;
2723
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002724 PyObject *target;
2725 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *handle_start;
2730 PyObject *handle_data;
2731 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002733 PyObject *handle_comment;
2734 PyObject *handle_pi;
2735 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002737 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739} XMLParserObject;
2740
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002741static PyObject*
2742_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2743static PyObject *
2744_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2745 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002746
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747/* helpers */
2748
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749LOCAL(PyObject*)
2750makeuniversal(XMLParserObject* self, const char* string)
2751{
2752 /* convert a UTF-8 tag/attribute name from the expat parser
2753 to a universal name string */
2754
Antoine Pitrouc1948842012-10-01 23:40:37 +02002755 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756 PyObject* key;
2757 PyObject* value;
2758
2759 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002760 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 if (!key)
2762 return NULL;
2763
2764 value = PyDict_GetItem(self->names, key);
2765
2766 if (value) {
2767 Py_INCREF(value);
2768 } else {
2769 /* new name. convert to universal name, and decode as
2770 necessary */
2771
2772 PyObject* tag;
2773 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002774 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775
2776 /* look for namespace separator */
2777 for (i = 0; i < size; i++)
2778 if (string[i] == '}')
2779 break;
2780 if (i != size) {
2781 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002782 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002783 if (tag == NULL) {
2784 Py_DECREF(key);
2785 return NULL;
2786 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002787 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 p[0] = '{';
2789 memcpy(p+1, string, size);
2790 size++;
2791 } else {
2792 /* plain name; use key as tag */
2793 Py_INCREF(key);
2794 tag = key;
2795 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002796
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002798 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002799 value = PyUnicode_DecodeUTF8(p, size, "strict");
2800 Py_DECREF(tag);
2801 if (!value) {
2802 Py_DECREF(key);
2803 return NULL;
2804 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805
2806 /* add to names dictionary */
2807 if (PyDict_SetItem(self->names, key, value) < 0) {
2808 Py_DECREF(key);
2809 Py_DECREF(value);
2810 return NULL;
2811 }
2812 }
2813
2814 Py_DECREF(key);
2815 return value;
2816}
2817
Eli Bendersky5b77d812012-03-16 08:20:05 +02002818/* Set the ParseError exception with the given parameters.
2819 * If message is not NULL, it's used as the error string. Otherwise, the
2820 * message string is the default for the given error_code.
2821*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002822static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002823expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2824 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002825{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002826 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002827 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002829 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002830 message ? message : EXPAT(ErrorString)(error_code),
2831 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002832 if (errmsg == NULL)
2833 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002834
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002835 error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002836 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002837 if (!error)
2838 return;
2839
Eli Bendersky5b77d812012-03-16 08:20:05 +02002840 /* Add code and position attributes */
2841 code = PyLong_FromLong((long)error_code);
2842 if (!code) {
2843 Py_DECREF(error);
2844 return;
2845 }
2846 if (PyObject_SetAttrString(error, "code", code) == -1) {
2847 Py_DECREF(error);
2848 Py_DECREF(code);
2849 return;
2850 }
2851 Py_DECREF(code);
2852
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002853 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854 if (!position) {
2855 Py_DECREF(error);
2856 return;
2857 }
2858 if (PyObject_SetAttrString(error, "position", position) == -1) {
2859 Py_DECREF(error);
2860 Py_DECREF(position);
2861 return;
2862 }
2863 Py_DECREF(position);
2864
Eli Bendersky532d03e2013-08-10 08:00:39 -07002865 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866 Py_DECREF(error);
2867}
2868
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869/* -------------------------------------------------------------------- */
2870/* handlers */
2871
2872static void
2873expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2874 int data_len)
2875{
2876 PyObject* key;
2877 PyObject* value;
2878 PyObject* res;
2879
2880 if (data_len < 2 || data_in[0] != '&')
2881 return;
2882
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002883 if (PyErr_Occurred())
2884 return;
2885
Neal Norwitz0269b912007-08-08 06:56:02 +00002886 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887 if (!key)
2888 return;
2889
2890 value = PyDict_GetItem(self->entity, key);
2891
2892 if (value) {
2893 if (TreeBuilder_CheckExact(self->target))
2894 res = treebuilder_handle_data(
2895 (TreeBuilderObject*) self->target, value
2896 );
2897 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01002898 res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899 else
2900 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002902 } else if (!PyErr_Occurred()) {
2903 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002904 char message[128] = "undefined entity ";
2905 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002906 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002909 EXPAT(GetErrorColumnNumber)(self->parser),
2910 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911 );
2912 }
2913
2914 Py_DECREF(key);
2915}
2916
2917static void
2918expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2919 const XML_Char **attrib_in)
2920{
2921 PyObject* res;
2922 PyObject* tag;
2923 PyObject* attrib;
2924 int ok;
2925
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002926 if (PyErr_Occurred())
2927 return;
2928
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 /* tag name */
2930 tag = makeuniversal(self, tag_in);
2931 if (!tag)
2932 return; /* parser will look for errors */
2933
2934 /* attributes */
2935 if (attrib_in[0]) {
2936 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002937 if (!attrib) {
2938 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002940 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 while (attrib_in[0] && attrib_in[1]) {
2942 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002943 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 if (!key || !value) {
2945 Py_XDECREF(value);
2946 Py_XDECREF(key);
2947 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002948 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 return;
2950 }
2951 ok = PyDict_SetItem(attrib, key, value);
2952 Py_DECREF(value);
2953 Py_DECREF(key);
2954 if (ok < 0) {
2955 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002956 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002957 return;
2958 }
2959 attrib_in += 2;
2960 }
2961 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002962 Py_INCREF(Py_None);
2963 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002964 }
2965
2966 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002967 /* shortcut */
2968 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2969 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002970 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002971 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002972 if (attrib == Py_None) {
2973 Py_DECREF(attrib);
2974 attrib = PyDict_New();
2975 if (!attrib) {
2976 Py_DECREF(tag);
2977 return;
2978 }
2979 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002980 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002981 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002982 res = NULL;
2983
2984 Py_DECREF(tag);
2985 Py_DECREF(attrib);
2986
2987 Py_XDECREF(res);
2988}
2989
2990static void
2991expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2992 int data_len)
2993{
2994 PyObject* data;
2995 PyObject* res;
2996
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002997 if (PyErr_Occurred())
2998 return;
2999
Neal Norwitz0269b912007-08-08 06:56:02 +00003000 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003001 if (!data)
3002 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003
3004 if (TreeBuilder_CheckExact(self->target))
3005 /* shortcut */
3006 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3007 else if (self->handle_data)
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003008 res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 else
3010 res = NULL;
3011
3012 Py_DECREF(data);
3013
3014 Py_XDECREF(res);
3015}
3016
3017static void
3018expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3019{
3020 PyObject* tag;
3021 PyObject* res = NULL;
3022
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003023 if (PyErr_Occurred())
3024 return;
3025
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026 if (TreeBuilder_CheckExact(self->target))
3027 /* shortcut */
3028 /* the standard tree builder doesn't look at the end tag */
3029 res = treebuilder_handle_end(
3030 (TreeBuilderObject*) self->target, Py_None
3031 );
3032 else if (self->handle_end) {
3033 tag = makeuniversal(self, tag_in);
3034 if (tag) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003035 res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003036 Py_DECREF(tag);
3037 }
3038 }
3039
3040 Py_XDECREF(res);
3041}
3042
3043static void
3044expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3045 const XML_Char *uri)
3046{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003047 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3048 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003049
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003050 if (PyErr_Occurred())
3051 return;
3052
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003053 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003054 return;
3055
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003056 if (!uri)
3057 uri = "";
3058 if (!prefix)
3059 prefix = "";
3060
3061 parcel = Py_BuildValue("ss", prefix, uri);
3062 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003063 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003064 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3065 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003066}
3067
3068static void
3069expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3070{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003071 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3072
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003073 if (PyErr_Occurred())
3074 return;
3075
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003076 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003077 return;
3078
3079 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003080}
3081
3082static void
3083expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3084{
3085 PyObject* comment;
3086 PyObject* res;
3087
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003088 if (PyErr_Occurred())
3089 return;
3090
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003091 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003092 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093 if (comment) {
Victor Stinner7bfb42d2016-12-05 17:04:32 +01003094 res = PyObject_CallFunctionObjArgs(self->handle_comment,
3095 comment, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 Py_XDECREF(res);
3097 Py_DECREF(comment);
3098 }
3099 }
3100}
3101
Eli Bendersky45839902013-01-13 05:14:47 -08003102static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003103expat_start_doctype_handler(XMLParserObject *self,
3104 const XML_Char *doctype_name,
3105 const XML_Char *sysid,
3106 const XML_Char *pubid,
3107 int has_internal_subset)
3108{
3109 PyObject *self_pyobj = (PyObject *)self;
3110 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3111 PyObject *parser_doctype = NULL;
3112 PyObject *res = NULL;
3113
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003114 if (PyErr_Occurred())
3115 return;
3116
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003117 doctype_name_obj = makeuniversal(self, doctype_name);
3118 if (!doctype_name_obj)
3119 return;
3120
3121 if (sysid) {
3122 sysid_obj = makeuniversal(self, sysid);
3123 if (!sysid_obj) {
3124 Py_DECREF(doctype_name_obj);
3125 return;
3126 }
3127 } else {
3128 Py_INCREF(Py_None);
3129 sysid_obj = Py_None;
3130 }
3131
3132 if (pubid) {
3133 pubid_obj = makeuniversal(self, pubid);
3134 if (!pubid_obj) {
3135 Py_DECREF(doctype_name_obj);
3136 Py_DECREF(sysid_obj);
3137 return;
3138 }
3139 } else {
3140 Py_INCREF(Py_None);
3141 pubid_obj = Py_None;
3142 }
3143
3144 /* If the target has a handler for doctype, call it. */
3145 if (self->handle_doctype) {
3146 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3147 doctype_name_obj, pubid_obj, sysid_obj);
3148 Py_CLEAR(res);
3149 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003150 else {
3151 /* Now see if the parser itself has a doctype method. If yes and it's
3152 * a custom method, call it but warn about deprecation. If it's only
3153 * the vanilla XMLParser method, do nothing.
3154 */
3155 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3156 if (parser_doctype &&
3157 !(PyCFunction_Check(parser_doctype) &&
3158 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3159 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003160 (PyCFunction) _elementtree_XMLParser_doctype)) {
3161 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3162 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003163 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003164 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003165 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003166 res = PyObject_CallFunction(parser_doctype, "OOO",
3167 doctype_name_obj, pubid_obj, sysid_obj);
3168 Py_CLEAR(res);
3169 }
3170 }
3171
3172clear:
3173 Py_XDECREF(parser_doctype);
3174 Py_DECREF(doctype_name_obj);
3175 Py_DECREF(pubid_obj);
3176 Py_DECREF(sysid_obj);
3177}
3178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179static void
3180expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3181 const XML_Char* data_in)
3182{
3183 PyObject* target;
3184 PyObject* data;
3185 PyObject* res;
3186
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003187 if (PyErr_Occurred())
3188 return;
3189
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003191 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3192 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003193 if (target && data) {
3194 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3195 Py_XDECREF(res);
3196 Py_DECREF(data);
3197 Py_DECREF(target);
3198 } else {
3199 Py_XDECREF(data);
3200 Py_XDECREF(target);
3201 }
3202 }
3203}
3204
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003206
Eli Bendersky52467b12012-06-01 07:13:08 +03003207static PyObject *
3208xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209{
Eli Bendersky52467b12012-06-01 07:13:08 +03003210 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3211 if (self) {
3212 self->parser = NULL;
3213 self->target = self->entity = self->names = NULL;
3214 self->handle_start = self->handle_data = self->handle_end = NULL;
3215 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003216 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003218 return (PyObject *)self;
3219}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220
Serhiy Storchakacb985562015-05-04 15:32:48 +03003221/*[clinic input]
3222_elementtree.XMLParser.__init__
3223
3224 html: object = NULL
3225 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003226 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003227
3228[clinic start generated code]*/
3229
Eli Bendersky52467b12012-06-01 07:13:08 +03003230static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003231_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3232 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003233/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003234{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003235 self->entity = PyDict_New();
3236 if (!self->entity)
3237 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238
Serhiy Storchakacb985562015-05-04 15:32:48 +03003239 self->names = PyDict_New();
3240 if (!self->names) {
3241 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003242 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003244
Serhiy Storchakacb985562015-05-04 15:32:48 +03003245 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3246 if (!self->parser) {
3247 Py_CLEAR(self->entity);
3248 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003250 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 }
3252
Eli Bendersky52467b12012-06-01 07:13:08 +03003253 if (target) {
3254 Py_INCREF(target);
3255 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003256 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003258 Py_CLEAR(self->entity);
3259 Py_CLEAR(self->names);
3260 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003261 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003264 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266 self->handle_start = PyObject_GetAttrString(target, "start");
3267 self->handle_data = PyObject_GetAttrString(target, "data");
3268 self->handle_end = PyObject_GetAttrString(target, "end");
3269 self->handle_comment = PyObject_GetAttrString(target, "comment");
3270 self->handle_pi = PyObject_GetAttrString(target, "pi");
3271 self->handle_close = PyObject_GetAttrString(target, "close");
3272 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273
3274 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003275
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003277 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 (XML_StartElementHandler) expat_start_handler,
3281 (XML_EndElementHandler) expat_end_handler
3282 );
3283 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285 (XML_DefaultHandler) expat_default_handler
3286 );
3287 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 (XML_CharacterDataHandler) expat_data_handler
3290 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 (XML_CommentHandler) expat_comment_handler
3295 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003298 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 (XML_ProcessingInstructionHandler) expat_pi_handler
3300 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003301 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003303 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3304 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003307 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003308 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309
Eli Bendersky52467b12012-06-01 07:13:08 +03003310 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311}
3312
Eli Bendersky52467b12012-06-01 07:13:08 +03003313static int
3314xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3315{
3316 Py_VISIT(self->handle_close);
3317 Py_VISIT(self->handle_pi);
3318 Py_VISIT(self->handle_comment);
3319 Py_VISIT(self->handle_end);
3320 Py_VISIT(self->handle_data);
3321 Py_VISIT(self->handle_start);
3322
3323 Py_VISIT(self->target);
3324 Py_VISIT(self->entity);
3325 Py_VISIT(self->names);
3326
3327 return 0;
3328}
3329
3330static int
3331xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332{
3333 EXPAT(ParserFree)(self->parser);
3334
Antoine Pitrouc1948842012-10-01 23:40:37 +02003335 Py_CLEAR(self->handle_close);
3336 Py_CLEAR(self->handle_pi);
3337 Py_CLEAR(self->handle_comment);
3338 Py_CLEAR(self->handle_end);
3339 Py_CLEAR(self->handle_data);
3340 Py_CLEAR(self->handle_start);
3341 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342
Antoine Pitrouc1948842012-10-01 23:40:37 +02003343 Py_CLEAR(self->target);
3344 Py_CLEAR(self->entity);
3345 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346
Eli Bendersky52467b12012-06-01 07:13:08 +03003347 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348}
3349
Eli Bendersky52467b12012-06-01 07:13:08 +03003350static void
3351xmlparser_dealloc(XMLParserObject* self)
3352{
3353 PyObject_GC_UnTrack(self);
3354 xmlparser_gc_clear(self);
3355 Py_TYPE(self)->tp_free((PyObject *)self);
3356}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357
3358LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003359expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360{
3361 int ok;
3362
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003363 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3365
3366 if (PyErr_Occurred())
3367 return NULL;
3368
3369 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003370 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003371 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003373 EXPAT(GetErrorColumnNumber)(self->parser),
3374 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375 );
3376 return NULL;
3377 }
3378
3379 Py_RETURN_NONE;
3380}
3381
Serhiy Storchakacb985562015-05-04 15:32:48 +03003382/*[clinic input]
3383_elementtree.XMLParser.close
3384
3385[clinic start generated code]*/
3386
3387static PyObject *
3388_elementtree_XMLParser_close_impl(XMLParserObject *self)
3389/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390{
3391 /* end feeding data to parser */
3392
3393 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003395 if (!res)
3396 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003398 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 Py_DECREF(res);
3400 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003401 }
3402 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003403 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003404 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003405 }
3406 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003407 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003408 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409}
3410
Serhiy Storchakacb985562015-05-04 15:32:48 +03003411/*[clinic input]
3412_elementtree.XMLParser.feed
3413
3414 data: object
3415 /
3416
3417[clinic start generated code]*/
3418
3419static PyObject *
3420_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3421/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422{
3423 /* feed data to parser */
3424
Serhiy Storchakacb985562015-05-04 15:32:48 +03003425 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003426 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003427 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3428 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003429 return NULL;
3430 if (data_len > INT_MAX) {
3431 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3432 return NULL;
3433 }
3434 /* Explicitly set UTF-8 encoding. Return code ignored. */
3435 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003436 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003437 }
3438 else {
3439 Py_buffer view;
3440 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003441 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003442 return NULL;
3443 if (view.len > INT_MAX) {
3444 PyBuffer_Release(&view);
3445 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3446 return NULL;
3447 }
3448 res = expat_parse(self, view.buf, (int)view.len, 0);
3449 PyBuffer_Release(&view);
3450 return res;
3451 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003452}
3453
Serhiy Storchakacb985562015-05-04 15:32:48 +03003454/*[clinic input]
3455_elementtree.XMLParser._parse_whole
3456
3457 file: object
3458 /
3459
3460[clinic start generated code]*/
3461
3462static PyObject *
3463_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3464/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465{
Eli Benderskya3699232013-05-19 18:47:23 -07003466 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467 PyObject* reader;
3468 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003469 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003470 PyObject* res;
3471
Serhiy Storchakacb985562015-05-04 15:32:48 +03003472 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 if (!reader)
3474 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003475
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 /* read from open file object */
3477 for (;;) {
3478
3479 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3480
3481 if (!buffer) {
3482 /* read failed (e.g. due to KeyboardInterrupt) */
3483 Py_DECREF(reader);
3484 return NULL;
3485 }
3486
Eli Benderskyf996e772012-03-16 05:53:30 +02003487 if (PyUnicode_CheckExact(buffer)) {
3488 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003489 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003490 Py_DECREF(buffer);
3491 break;
3492 }
3493 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003494 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 if (!temp) {
3496 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003497 Py_DECREF(reader);
3498 return NULL;
3499 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003500 buffer = temp;
3501 }
3502 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503 Py_DECREF(buffer);
3504 break;
3505 }
3506
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003507 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3508 Py_DECREF(buffer);
3509 Py_DECREF(reader);
3510 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3511 return NULL;
3512 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003513 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003514 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 );
3516
3517 Py_DECREF(buffer);
3518
3519 if (!res) {
3520 Py_DECREF(reader);
3521 return NULL;
3522 }
3523 Py_DECREF(res);
3524
3525 }
3526
3527 Py_DECREF(reader);
3528
3529 res = expat_parse(self, "", 0, 1);
3530
3531 if (res && TreeBuilder_CheckExact(self->target)) {
3532 Py_DECREF(res);
3533 return treebuilder_done((TreeBuilderObject*) self->target);
3534 }
3535
3536 return res;
3537}
3538
Serhiy Storchakacb985562015-05-04 15:32:48 +03003539/*[clinic input]
3540_elementtree.XMLParser.doctype
3541
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003542 name: object
3543 pubid: object
3544 system: object
3545 /
3546
Serhiy Storchakacb985562015-05-04 15:32:48 +03003547[clinic start generated code]*/
3548
3549static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003550_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3551 PyObject *pubid, PyObject *system)
3552/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003553{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003554 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3555 "This method of XMLParser is deprecated. Define"
3556 " doctype() method on the TreeBuilder target.",
3557 1) < 0) {
3558 return NULL;
3559 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003560 Py_RETURN_NONE;
3561}
3562
Serhiy Storchakacb985562015-05-04 15:32:48 +03003563/*[clinic input]
3564_elementtree.XMLParser._setevents
3565
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003566 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003567 events_to_report: object = None
3568 /
3569
3570[clinic start generated code]*/
3571
3572static PyObject *
3573_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3574 PyObject *events_queue,
3575 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003576/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577{
3578 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003579 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003580 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003581 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582
3583 if (!TreeBuilder_CheckExact(self->target)) {
3584 PyErr_SetString(
3585 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003586 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587 "targets"
3588 );
3589 return NULL;
3590 }
3591
3592 target = (TreeBuilderObject*) self->target;
3593
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003594 events_append = PyObject_GetAttrString(events_queue, "append");
3595 if (events_append == NULL)
3596 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003597 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003598
3599 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003600 Py_CLEAR(target->start_event_obj);
3601 Py_CLEAR(target->end_event_obj);
3602 Py_CLEAR(target->start_ns_event_obj);
3603 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003605 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003607 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608 Py_RETURN_NONE;
3609 }
3610
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003611 if (!(events_seq = PySequence_Fast(events_to_report,
3612 "events must be a sequence"))) {
3613 return NULL;
3614 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003616 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003617 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02003618 const char *event_name = NULL;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003620 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003621 } else if (PyBytes_Check(event_name_obj)) {
3622 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003623 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003624 if (event_name == NULL) {
3625 Py_DECREF(events_seq);
3626 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3627 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003628 }
3629
3630 Py_INCREF(event_name_obj);
3631 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003632 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003634 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003635 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003636 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637 EXPAT(SetNamespaceDeclHandler)(
3638 self->parser,
3639 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3640 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3641 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003643 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 EXPAT(SetNamespaceDeclHandler)(
3645 self->parser,
3646 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3647 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3648 );
3649 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003650 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003651 Py_DECREF(events_seq);
3652 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653 return NULL;
3654 }
3655 }
3656
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003657 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659}
3660
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003661static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003662xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003663{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003664 if (PyUnicode_Check(nameobj)) {
3665 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003666 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003667 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003668 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003669 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003670 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003671 return PyUnicode_FromFormat(
3672 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003674 }
3675 else
3676 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677
Alexander Belopolskye239d232010-12-08 23:31:48 +00003678 Py_INCREF(res);
3679 return res;
3680 }
3681 generic:
3682 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683}
3684
Serhiy Storchakacb985562015-05-04 15:32:48 +03003685#include "clinic/_elementtree.c.h"
3686
3687static PyMethodDef element_methods[] = {
3688
3689 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3690
3691 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3692 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3693
3694 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3695 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3696 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3697
3698 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3699 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3700 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3701 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3702
3703 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3704 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3705 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3706
Victor Stinner84d8baa2016-09-29 22:12:35 +02003707 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3709
3710 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3711 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3712
3713 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3714
3715 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3716 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3717 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3718 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3719 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3720
3721 {NULL, NULL}
3722};
3723
3724static PyMappingMethods element_as_mapping = {
3725 (lenfunc) element_length,
3726 (binaryfunc) element_subscr,
3727 (objobjargproc) element_ass_subscr,
3728};
3729
Serhiy Storchakadde08152015-11-25 15:28:13 +02003730static PyGetSetDef element_getsetlist[] = {
3731 {"tag",
3732 (getter)element_tag_getter,
3733 (setter)element_tag_setter,
3734 "A string identifying what kind of data this element represents"},
3735 {"text",
3736 (getter)element_text_getter,
3737 (setter)element_text_setter,
3738 "A string of text directly after the start tag, or None"},
3739 {"tail",
3740 (getter)element_tail_getter,
3741 (setter)element_tail_setter,
3742 "A string of text directly after the end tag, or None"},
3743 {"attrib",
3744 (getter)element_attrib_getter,
3745 (setter)element_attrib_setter,
3746 "A dictionary containing the element's attributes"},
3747 {NULL},
3748};
3749
Serhiy Storchakacb985562015-05-04 15:32:48 +03003750static PyTypeObject Element_Type = {
3751 PyVarObject_HEAD_INIT(NULL, 0)
3752 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3753 /* methods */
3754 (destructor)element_dealloc, /* tp_dealloc */
3755 0, /* tp_print */
3756 0, /* tp_getattr */
3757 0, /* tp_setattr */
3758 0, /* tp_reserved */
3759 (reprfunc)element_repr, /* tp_repr */
3760 0, /* tp_as_number */
3761 &element_as_sequence, /* tp_as_sequence */
3762 &element_as_mapping, /* tp_as_mapping */
3763 0, /* tp_hash */
3764 0, /* tp_call */
3765 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003766 PyObject_GenericGetAttr, /* tp_getattro */
3767 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003768 0, /* tp_as_buffer */
3769 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3770 /* tp_flags */
3771 0, /* tp_doc */
3772 (traverseproc)element_gc_traverse, /* tp_traverse */
3773 (inquiry)element_gc_clear, /* tp_clear */
3774 0, /* tp_richcompare */
3775 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3776 0, /* tp_iter */
3777 0, /* tp_iternext */
3778 element_methods, /* tp_methods */
3779 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003780 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003781 0, /* tp_base */
3782 0, /* tp_dict */
3783 0, /* tp_descr_get */
3784 0, /* tp_descr_set */
3785 0, /* tp_dictoffset */
3786 (initproc)element_init, /* tp_init */
3787 PyType_GenericAlloc, /* tp_alloc */
3788 element_new, /* tp_new */
3789 0, /* tp_free */
3790};
3791
3792static PyMethodDef treebuilder_methods[] = {
3793 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3794 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3795 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3796 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3797 {NULL, NULL}
3798};
3799
3800static PyTypeObject TreeBuilder_Type = {
3801 PyVarObject_HEAD_INIT(NULL, 0)
3802 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3803 /* methods */
3804 (destructor)treebuilder_dealloc, /* tp_dealloc */
3805 0, /* tp_print */
3806 0, /* tp_getattr */
3807 0, /* tp_setattr */
3808 0, /* tp_reserved */
3809 0, /* tp_repr */
3810 0, /* tp_as_number */
3811 0, /* tp_as_sequence */
3812 0, /* tp_as_mapping */
3813 0, /* tp_hash */
3814 0, /* tp_call */
3815 0, /* tp_str */
3816 0, /* tp_getattro */
3817 0, /* tp_setattro */
3818 0, /* tp_as_buffer */
3819 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3820 /* tp_flags */
3821 0, /* tp_doc */
3822 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3823 (inquiry)treebuilder_gc_clear, /* tp_clear */
3824 0, /* tp_richcompare */
3825 0, /* tp_weaklistoffset */
3826 0, /* tp_iter */
3827 0, /* tp_iternext */
3828 treebuilder_methods, /* tp_methods */
3829 0, /* tp_members */
3830 0, /* tp_getset */
3831 0, /* tp_base */
3832 0, /* tp_dict */
3833 0, /* tp_descr_get */
3834 0, /* tp_descr_set */
3835 0, /* tp_dictoffset */
3836 _elementtree_TreeBuilder___init__, /* tp_init */
3837 PyType_GenericAlloc, /* tp_alloc */
3838 treebuilder_new, /* tp_new */
3839 0, /* tp_free */
3840};
3841
3842static PyMethodDef xmlparser_methods[] = {
3843 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3844 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3845 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3846 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3847 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3848 {NULL, NULL}
3849};
3850
Neal Norwitz227b5332006-03-22 09:28:35 +00003851static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003852 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003853 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003854 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003855 (destructor)xmlparser_dealloc, /* tp_dealloc */
3856 0, /* tp_print */
3857 0, /* tp_getattr */
3858 0, /* tp_setattr */
3859 0, /* tp_reserved */
3860 0, /* tp_repr */
3861 0, /* tp_as_number */
3862 0, /* tp_as_sequence */
3863 0, /* tp_as_mapping */
3864 0, /* tp_hash */
3865 0, /* tp_call */
3866 0, /* tp_str */
3867 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3868 0, /* tp_setattro */
3869 0, /* tp_as_buffer */
3870 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3871 /* tp_flags */
3872 0, /* tp_doc */
3873 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3874 (inquiry)xmlparser_gc_clear, /* tp_clear */
3875 0, /* tp_richcompare */
3876 0, /* tp_weaklistoffset */
3877 0, /* tp_iter */
3878 0, /* tp_iternext */
3879 xmlparser_methods, /* tp_methods */
3880 0, /* tp_members */
3881 0, /* tp_getset */
3882 0, /* tp_base */
3883 0, /* tp_dict */
3884 0, /* tp_descr_get */
3885 0, /* tp_descr_set */
3886 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003887 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003888 PyType_GenericAlloc, /* tp_alloc */
3889 xmlparser_new, /* tp_new */
3890 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003891};
3892
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003893/* ==================================================================== */
3894/* python module interface */
3895
3896static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003897 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003898 {NULL, NULL}
3899};
3900
Martin v. Löwis1a214512008-06-11 05:26:20 +00003901
Eli Bendersky532d03e2013-08-10 08:00:39 -07003902static struct PyModuleDef elementtreemodule = {
3903 PyModuleDef_HEAD_INIT,
3904 "_elementtree",
3905 NULL,
3906 sizeof(elementtreestate),
3907 _functions,
3908 NULL,
3909 elementtree_traverse,
3910 elementtree_clear,
3911 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003912};
3913
Neal Norwitzf6657e62006-12-28 04:47:50 +00003914PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003915PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003916{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003917 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003918 elementtreestate *st;
3919
3920 m = PyState_FindModule(&elementtreemodule);
3921 if (m) {
3922 Py_INCREF(m);
3923 return m;
3924 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003925
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003926 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003927 if (PyType_Ready(&ElementIter_Type) < 0)
3928 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003929 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003930 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003931 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003932 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003933 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003934 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003935
Eli Bendersky532d03e2013-08-10 08:00:39 -07003936 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003937 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003938 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003939 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003940
Eli Bendersky828efde2012-04-05 05:40:58 +03003941 if (!(temp = PyImport_ImportModule("copy")))
3942 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003943 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003944 Py_XDECREF(temp);
3945
Eli Bendersky532d03e2013-08-10 08:00:39 -07003946 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003947 return NULL;
3948
Eli Bendersky20d41742012-06-01 09:48:37 +03003949 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003950 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3951 if (expat_capi) {
3952 /* check that it's usable */
3953 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003954 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003955 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3956 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003957 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003958 PyErr_SetString(PyExc_ImportError,
3959 "pyexpat version is incompatible");
3960 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003961 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003962 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003963 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003964 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003965
Eli Bendersky532d03e2013-08-10 08:00:39 -07003966 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003967 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003968 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003969 Py_INCREF(st->parseerror_obj);
3970 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003971
Eli Bendersky092af1f2012-03-04 07:14:03 +02003972 Py_INCREF((PyObject *)&Element_Type);
3973 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3974
Eli Bendersky58d548d2012-05-29 15:45:16 +03003975 Py_INCREF((PyObject *)&TreeBuilder_Type);
3976 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3977
Eli Bendersky52467b12012-06-01 07:13:08 +03003978 Py_INCREF((PyObject *)&XMLParser_Type);
3979 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003980
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003981 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003982}