blob: bef702ebe69c93376e766082f87987594dc075a8 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
Benjamin Petersonca470632016-09-06 13:47:26 -070060#define JOIN_GET(p) ((uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
62#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300134 /* join list elements */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000143 return result;
144}
145
Eli Bendersky48d358b2012-05-30 17:57:50 +0300146/* Is the given object an empty dictionary?
147*/
148static int
149is_empty_dict(PyObject *obj)
150{
151 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
152}
153
154
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000155/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200156/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157
158typedef struct {
159
160 /* attributes (a dictionary object), or None if no attributes */
161 PyObject* attrib;
162
163 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200164 Py_ssize_t length; /* actual number of items */
165 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166
167 /* this either points to _children or to a malloced buffer */
168 PyObject* *children;
169
170 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100171
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000172} ElementObjectExtra;
173
174typedef struct {
175 PyObject_HEAD
176
177 /* element tag (a string). */
178 PyObject* tag;
179
180 /* text before first child. note that this is a tagged pointer;
181 use JOIN_OBJ to get the object pointer. the join flag is used
182 to distinguish lists created by the tree builder from lists
183 assigned to the attribute by application code; the former
184 should be joined before being returned to the user, the latter
185 should be left intact. */
186 PyObject* text;
187
188 /* text after this element, in parent. note that this is a tagged
189 pointer; use JOIN_OBJ to get the object pointer. */
190 PyObject* tail;
191
192 ElementObjectExtra* extra;
193
Eli Benderskyebf37a22012-04-03 22:02:37 +0300194 PyObject *weakreflist; /* For tp_weaklistoffset */
195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObject;
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198
Christian Heimes90aa7642007-12-19 02:45:37 +0000199#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
201/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200202/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000203
204LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200205create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000206{
207 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200208 if (!self->extra) {
209 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000210 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200211 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212
213 if (!attrib)
214 attrib = Py_None;
215
216 Py_INCREF(attrib);
217 self->extra->attrib = attrib;
218
219 self->extra->length = 0;
220 self->extra->allocated = STATIC_CHILDREN;
221 self->extra->children = self->extra->_children;
222
223 return 0;
224}
225
226LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200227dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000228{
Eli Bendersky08b85292012-04-04 15:55:07 +0300229 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200230 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300231
Eli Benderskyebf37a22012-04-03 22:02:37 +0300232 if (!self->extra)
233 return;
234
235 /* Avoid DECREFs calling into this code again (cycles, etc.)
236 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300237 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300238 self->extra = NULL;
239
240 Py_DECREF(myextra->attrib);
241
Eli Benderskyebf37a22012-04-03 22:02:37 +0300242 for (i = 0; i < myextra->length; i++)
243 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000244
Eli Benderskyebf37a22012-04-03 22:02:37 +0300245 if (myextra->children != myextra->_children)
246 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000247
Eli Benderskyebf37a22012-04-03 22:02:37 +0300248 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249}
250
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251/* Convenience internal function to create new Element objects with the given
252 * tag and attributes.
253*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200255create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256{
257 ElementObject* self;
258
Eli Bendersky0192ba32012-03-30 16:38:33 +0300259 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000260 if (self == NULL)
261 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 self->extra = NULL;
263
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 Py_INCREF(tag);
265 self->tag = tag;
266
267 Py_INCREF(Py_None);
268 self->text = Py_None;
269
270 Py_INCREF(Py_None);
271 self->tail = Py_None;
272
Eli Benderskyebf37a22012-04-03 22:02:37 +0300273 self->weakreflist = NULL;
274
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200275 ALLOC(sizeof(ElementObject), "create element");
276 PyObject_GC_Track(self);
277
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200278 if (attrib != Py_None && !is_empty_dict(attrib)) {
279 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200280 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200281 return NULL;
282 }
283 }
284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000285 return (PyObject*) self;
286}
287
Eli Bendersky092af1f2012-03-04 07:14:03 +0200288static PyObject *
289element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
290{
291 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
292 if (e != NULL) {
293 Py_INCREF(Py_None);
294 e->tag = Py_None;
295
296 Py_INCREF(Py_None);
297 e->text = Py_None;
298
299 Py_INCREF(Py_None);
300 e->tail = Py_None;
301
302 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300303 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200304 }
305 return (PyObject *)e;
306}
307
Eli Bendersky737b1732012-05-29 06:02:56 +0300308/* Helper function for extracting the attrib dictionary from a keywords dict.
309 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800310 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300311 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700312 *
313 * Return a dictionary with the content of kwds merged into the content of
314 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300315 */
316static PyObject*
317get_attrib_from_keywords(PyObject *kwds)
318{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700319 PyObject *attrib_str = PyUnicode_FromString("attrib");
320 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300321
322 if (attrib) {
323 /* If attrib was found in kwds, copy its value and remove it from
324 * kwds
325 */
326 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700327 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300328 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
329 Py_TYPE(attrib)->tp_name);
330 return NULL;
331 }
332 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700333 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300334 } else {
335 attrib = PyDict_New();
336 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700337
338 Py_DECREF(attrib_str);
339
340 /* attrib can be NULL if PyDict_New failed */
341 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200342 if (PyDict_Update(attrib, kwds) < 0)
343 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300344 return attrib;
345}
346
Serhiy Storchakacb985562015-05-04 15:32:48 +0300347/*[clinic input]
348module _elementtree
349class _elementtree.Element "ElementObject *" "&Element_Type"
350class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
351class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
352[clinic start generated code]*/
353/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
354
Eli Bendersky092af1f2012-03-04 07:14:03 +0200355static int
356element_init(PyObject *self, PyObject *args, PyObject *kwds)
357{
358 PyObject *tag;
359 PyObject *tmp;
360 PyObject *attrib = NULL;
361 ElementObject *self_elem;
362
363 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
364 return -1;
365
Eli Bendersky737b1732012-05-29 06:02:56 +0300366 if (attrib) {
367 /* attrib passed as positional arg */
368 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200369 if (!attrib)
370 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300371 if (kwds) {
372 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200373 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300374 return -1;
375 }
376 }
377 } else if (kwds) {
378 /* have keywords args */
379 attrib = get_attrib_from_keywords(kwds);
380 if (!attrib)
381 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200382 }
383
384 self_elem = (ElementObject *)self;
385
Antoine Pitrouc1948842012-10-01 23:40:37 +0200386 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200387 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 return -1;
390 }
391 }
392
Eli Bendersky48d358b2012-05-30 17:57:50 +0300393 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200394 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395
396 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300398 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399
400 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_DECREF(JOIN_OBJ(tmp));
404
405 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200406 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200407 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_DECREF(JOIN_OBJ(tmp));
409
410 return 0;
411}
412
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000413LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200414element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417 PyObject* *children;
418
419 /* make sure self->children can hold the given number of extra
420 elements. set an exception and return -1 if allocation failed */
421
Victor Stinner5f0af232013-07-11 23:01:36 +0200422 if (!self->extra) {
423 if (create_extra(self, NULL) < 0)
424 return -1;
425 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000426
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200427 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
429 if (size > self->extra->allocated) {
430 /* use Python 2.4's list growth strategy */
431 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000432 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100433 * which needs at least 4 bytes.
434 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000435 * be safe.
436 */
437 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200438 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
439 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000440 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000441 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100442 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 * false alarm always assume at least one child to be safe.
444 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 children = PyObject_Realloc(self->extra->children,
446 size * sizeof(PyObject*));
447 if (!children)
448 goto nomemory;
449 } else {
450 children = PyObject_Malloc(size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 /* copy existing children from static area to malloc buffer */
454 memcpy(children, self->extra->children,
455 self->extra->length * sizeof(PyObject*));
456 }
457 self->extra->children = children;
458 self->extra->allocated = size;
459 }
460
461 return 0;
462
463 nomemory:
464 PyErr_NoMemory();
465 return -1;
466}
467
468LOCAL(int)
469element_add_subelement(ElementObject* self, PyObject* element)
470{
471 /* add a child element to a parent */
472
473 if (element_resize(self, 1) < 0)
474 return -1;
475
476 Py_INCREF(element);
477 self->extra->children[self->extra->length] = element;
478
479 self->extra->length++;
480
481 return 0;
482}
483
484LOCAL(PyObject*)
485element_get_attrib(ElementObject* self)
486{
487 /* return borrowed reference to attrib dictionary */
488 /* note: this function assumes that the extra section exists */
489
490 PyObject* res = self->extra->attrib;
491
492 if (res == Py_None) {
493 /* create missing dictionary */
494 res = PyDict_New();
495 if (!res)
496 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200497 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000498 self->extra->attrib = res;
499 }
500
501 return res;
502}
503
504LOCAL(PyObject*)
505element_get_text(ElementObject* self)
506{
507 /* return borrowed reference to text attribute */
508
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300509 PyObject *res = self->text;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000510
511 if (JOIN_GET(res)) {
512 res = JOIN_OBJ(res);
513 if (PyList_CheckExact(res)) {
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300514 PyObject *tmp = list_join(res);
515 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000516 return NULL;
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300517 self->text = tmp;
518 Py_DECREF(res);
519 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300531 PyObject *res = self->tail;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300536 PyObject *tmp = list_join(res);
537 if (!tmp)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000538 return NULL;
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +0300539 self->tail = tmp;
540 Py_DECREF(res);
541 res = tmp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200631 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300632
633 if (self->weakreflist != NULL)
634 PyObject_ClearWeakRefs((PyObject *) self);
635
Eli Bendersky0192ba32012-03-30 16:38:33 +0300636 /* element_gc_clear clears all references and deallocates extra
637 */
638 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639
640 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200641 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200642 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000643}
644
645/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646
Serhiy Storchakacb985562015-05-04 15:32:48 +0300647/*[clinic input]
648_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000649
Serhiy Storchakacb985562015-05-04 15:32:48 +0300650 subelement: object(subclass_of='&Element_Type')
651 /
652
653[clinic start generated code]*/
654
655static PyObject *
656_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
657/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
658{
659 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660 return NULL;
661
662 Py_RETURN_NONE;
663}
664
Serhiy Storchakacb985562015-05-04 15:32:48 +0300665/*[clinic input]
666_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667
Serhiy Storchakacb985562015-05-04 15:32:48 +0300668[clinic start generated code]*/
669
670static PyObject *
671_elementtree_Element_clear_impl(ElementObject *self)
672/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
673{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300674 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->text));
678 self->text = Py_None;
679
680 Py_INCREF(Py_None);
681 Py_DECREF(JOIN_OBJ(self->tail));
682 self->tail = Py_None;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.__copy__
689
690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element___copy___impl(ElementObject *self)
694/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200696 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 ElementObject* element;
698
Eli Bendersky092af1f2012-03-04 07:14:03 +0200699 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800700 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000701 if (!element)
702 return NULL;
703
704 Py_DECREF(JOIN_OBJ(element->text));
705 element->text = self->text;
706 Py_INCREF(JOIN_OBJ(element->text));
707
708 Py_DECREF(JOIN_OBJ(element->tail));
709 element->tail = self->tail;
710 Py_INCREF(JOIN_OBJ(element->tail));
711
712 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000713 if (element_resize(element, self->extra->length) < 0) {
714 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000716 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717
718 for (i = 0; i < self->extra->length; i++) {
719 Py_INCREF(self->extra->children[i]);
720 element->extra->children[i] = self->extra->children[i];
721 }
722
723 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000724 }
725
726 return (PyObject*) element;
727}
728
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200729/* Helper for a deep copy. */
730LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
731
Serhiy Storchakacb985562015-05-04 15:32:48 +0300732/*[clinic input]
733_elementtree.Element.__deepcopy__
734
735 memo: object
736 /
737
738[clinic start generated code]*/
739
740static PyObject *
741_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
742/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200744 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000745 ElementObject* element;
746 PyObject* tag;
747 PyObject* attrib;
748 PyObject* text;
749 PyObject* tail;
750 PyObject* id;
751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000752 tag = deepcopy(self->tag, memo);
753 if (!tag)
754 return NULL;
755
756 if (self->extra) {
757 attrib = deepcopy(self->extra->attrib, memo);
758 if (!attrib) {
759 Py_DECREF(tag);
760 return NULL;
761 }
762 } else {
763 Py_INCREF(Py_None);
764 attrib = Py_None;
765 }
766
Eli Bendersky092af1f2012-03-04 07:14:03 +0200767 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000768
769 Py_DECREF(tag);
770 Py_DECREF(attrib);
771
772 if (!element)
773 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100774
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000775 text = deepcopy(JOIN_OBJ(self->text), memo);
776 if (!text)
777 goto error;
778 Py_DECREF(element->text);
779 element->text = JOIN_SET(text, JOIN_GET(self->text));
780
781 tail = deepcopy(JOIN_OBJ(self->tail), memo);
782 if (!tail)
783 goto error;
784 Py_DECREF(element->tail);
785 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
786
787 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000788 if (element_resize(element, self->extra->length) < 0)
789 goto error;
790
791 for (i = 0; i < self->extra->length; i++) {
792 PyObject* child = deepcopy(self->extra->children[i], memo);
793 if (!child) {
794 element->extra->length = i;
795 goto error;
796 }
797 element->extra->children[i] = child;
798 }
799
800 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000801 }
802
803 /* add object to memo dictionary (so deepcopy won't visit it again) */
Benjamin Petersonca470632016-09-06 13:47:26 -0700804 id = PyLong_FromSsize_t((uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000805 if (!id)
806 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807
808 i = PyDict_SetItem(memo, id, (PyObject*) element);
809
810 Py_DECREF(id);
811
812 if (i < 0)
813 goto error;
814
815 return (PyObject*) element;
816
817 error:
818 Py_DECREF(element);
819 return NULL;
820}
821
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200822LOCAL(PyObject *)
823deepcopy(PyObject *object, PyObject *memo)
824{
825 /* do a deep copy of the given object */
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200826 elementtreestate *st;
Victor Stinner7fbac452016-08-20 01:34:44 +0200827 PyObject *stack[2];
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200828
829 /* Fast paths */
830 if (object == Py_None || PyUnicode_CheckExact(object)) {
831 Py_INCREF(object);
832 return object;
833 }
834
835 if (Py_REFCNT(object) == 1) {
836 if (PyDict_CheckExact(object)) {
837 PyObject *key, *value;
838 Py_ssize_t pos = 0;
839 int simple = 1;
840 while (PyDict_Next(object, &pos, &key, &value)) {
841 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
842 simple = 0;
843 break;
844 }
845 }
846 if (simple)
847 return PyDict_Copy(object);
848 /* Fall through to general case */
849 }
850 else if (Element_CheckExact(object)) {
851 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
852 }
853 }
854
855 /* General case */
856 st = ET_STATE_GLOBAL;
857 if (!st->deepcopy_obj) {
858 PyErr_SetString(PyExc_RuntimeError,
859 "deepcopy helper not found");
860 return NULL;
861 }
862
Victor Stinner7fbac452016-08-20 01:34:44 +0200863 stack[0] = object;
864 stack[1] = memo;
Victor Stinner559bb6a2016-08-22 22:48:54 +0200865 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->text);
970 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 _clear_joined_ptr(&self->tail);
974 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
975 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
1137 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001178 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001179 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
1189 if (!Element_CheckExact(item))
1190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001223 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001224 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225 );
1226
1227 if (!self->extra) {
1228 Py_INCREF(default_value);
1229 return default_value;
1230 }
1231
1232 for (i = 0; i < self->extra->length; i++) {
1233 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001234 int rc;
1235 if (!Element_CheckExact(item))
1236 continue;
1237 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001238 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001240 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (text == Py_None) {
1242 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001243 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001245 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247 return text;
1248 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 Py_DECREF(item);
1250 if (rc < 0)
1251 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 }
1253
1254 Py_INCREF(default_value);
1255 return default_value;
1256}
1257
Serhiy Storchakacb985562015-05-04 15:32:48 +03001258/*[clinic input]
1259_elementtree.Element.findall
1260
1261 path: object
1262 namespaces: object = None
1263
1264[clinic start generated code]*/
1265
1266static PyObject *
1267_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1268 PyObject *namespaces)
1269/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001270{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001271 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001273 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001274 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001279 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001281 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282
1283 out = PyList_New(0);
1284 if (!out)
1285 return NULL;
1286
1287 if (!self->extra)
1288 return out;
1289
1290 for (i = 0; i < self->extra->length; i++) {
1291 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 int rc;
1293 if (!Element_CheckExact(item))
1294 continue;
1295 Py_INCREF(item);
1296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1297 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1298 Py_DECREF(item);
1299 Py_DECREF(out);
1300 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 return out;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310
Serhiy Storchakacb985562015-05-04 15:32:48 +03001311 path: object
1312 namespaces: object = None
1313
1314[clinic start generated code]*/
1315
1316static PyObject *
1317_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1318 PyObject *namespaces)
1319/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1320{
1321 PyObject* tag = path;
1322 _Py_IDENTIFIER(iterfind);
1323 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001325 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001326 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.get
1331
1332 key: object
1333 default: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1339 PyObject *default_value)
1340/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
1342 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 if (!self->extra || self->extra->attrib == Py_None)
1345 value = default_value;
1346 else {
1347 value = PyDict_GetItem(self->extra->attrib, key);
1348 if (!value)
1349 value = default_value;
1350 }
1351
1352 Py_INCREF(value);
1353 return value;
1354}
1355
Serhiy Storchakacb985562015-05-04 15:32:48 +03001356/*[clinic input]
1357_elementtree.Element.getchildren
1358
1359[clinic start generated code]*/
1360
1361static PyObject *
1362_elementtree_Element_getchildren_impl(ElementObject *self)
1363/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001365 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 PyObject* list;
1367
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001368 /* FIXME: report as deprecated? */
1369
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 if (!self->extra)
1371 return PyList_New(0);
1372
1373 list = PyList_New(self->extra->length);
1374 if (!list)
1375 return NULL;
1376
1377 for (i = 0; i < self->extra->length; i++) {
1378 PyObject* item = self->extra->children[i];
1379 Py_INCREF(item);
1380 PyList_SET_ITEM(list, i, item);
1381 }
1382
1383 return list;
1384}
1385
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387static PyObject *
1388create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.iter
1393
1394 tag: object = None
1395
1396[clinic start generated code]*/
1397
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001399_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1400/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001401{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001402 if (PyUnicode_Check(tag)) {
1403 if (PyUnicode_READY(tag) < 0)
1404 return NULL;
1405 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1406 tag = Py_None;
1407 }
1408 else if (PyBytes_Check(tag)) {
1409 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1410 tag = Py_None;
1411 }
1412
Eli Bendersky64d11e62012-06-15 07:42:50 +03001413 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001414}
1415
1416
Serhiy Storchakacb985562015-05-04 15:32:48 +03001417/*[clinic input]
1418_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001419
Serhiy Storchakacb985562015-05-04 15:32:48 +03001420[clinic start generated code]*/
1421
1422static PyObject *
1423_elementtree_Element_itertext_impl(ElementObject *self)
1424/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1425{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001426 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427}
1428
Eli Bendersky64d11e62012-06-15 07:42:50 +03001429
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001431element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001433 ElementObject* self = (ElementObject*) self_;
1434
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435 if (!self->extra || index < 0 || index >= self->extra->length) {
1436 PyErr_SetString(
1437 PyExc_IndexError,
1438 "child index out of range"
1439 );
1440 return NULL;
1441 }
1442
1443 Py_INCREF(self->extra->children[index]);
1444 return self->extra->children[index];
1445}
1446
Serhiy Storchakacb985562015-05-04 15:32:48 +03001447/*[clinic input]
1448_elementtree.Element.insert
1449
1450 index: Py_ssize_t
1451 subelement: object(subclass_of='&Element_Type')
1452 /
1453
1454[clinic start generated code]*/
1455
1456static PyObject *
1457_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1458 PyObject *subelement)
1459/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001461 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462
Victor Stinner5f0af232013-07-11 23:01:36 +02001463 if (!self->extra) {
1464 if (create_extra(self, NULL) < 0)
1465 return NULL;
1466 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001467
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468 if (index < 0) {
1469 index += self->extra->length;
1470 if (index < 0)
1471 index = 0;
1472 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (index > self->extra->length)
1474 index = self->extra->length;
1475
1476 if (element_resize(self, 1) < 0)
1477 return NULL;
1478
1479 for (i = self->extra->length; i > index; i--)
1480 self->extra->children[i] = self->extra->children[i-1];
1481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482 Py_INCREF(subelement);
1483 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484
1485 self->extra->length++;
1486
1487 Py_RETURN_NONE;
1488}
1489
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490/*[clinic input]
1491_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492
Serhiy Storchakacb985562015-05-04 15:32:48 +03001493[clinic start generated code]*/
1494
1495static PyObject *
1496_elementtree_Element_items_impl(ElementObject *self)
1497/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1498{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 if (!self->extra || self->extra->attrib == Py_None)
1500 return PyList_New(0);
1501
1502 return PyDict_Items(self->extra->attrib);
1503}
1504
Serhiy Storchakacb985562015-05-04 15:32:48 +03001505/*[clinic input]
1506_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507
Serhiy Storchakacb985562015-05-04 15:32:48 +03001508[clinic start generated code]*/
1509
1510static PyObject *
1511_elementtree_Element_keys_impl(ElementObject *self)
1512/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1513{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514 if (!self->extra || self->extra->attrib == Py_None)
1515 return PyList_New(0);
1516
1517 return PyDict_Keys(self->extra->attrib);
1518}
1519
Martin v. Löwis18e16552006-02-15 17:27:45 +00001520static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521element_length(ElementObject* self)
1522{
1523 if (!self->extra)
1524 return 0;
1525
1526 return self->extra->length;
1527}
1528
Serhiy Storchakacb985562015-05-04 15:32:48 +03001529/*[clinic input]
1530_elementtree.Element.makeelement
1531
1532 tag: object
1533 attrib: object
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1540 PyObject *attrib)
1541/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542{
1543 PyObject* elem;
1544
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 attrib = PyDict_Copy(attrib);
1546 if (!attrib)
1547 return NULL;
1548
Eli Bendersky092af1f2012-03-04 07:14:03 +02001549 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001550
1551 Py_DECREF(attrib);
1552
1553 return elem;
1554}
1555
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556/*[clinic input]
1557_elementtree.Element.remove
1558
1559 subelement: object(subclass_of='&Element_Type')
1560 /
1561
1562[clinic start generated code]*/
1563
1564static PyObject *
1565_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1566/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001568 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001569 int rc;
1570 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 if (!self->extra) {
1573 /* element has no children, so raise exception */
1574 PyErr_SetString(
1575 PyExc_ValueError,
1576 "list.remove(x): x not in list"
1577 );
1578 return NULL;
1579 }
1580
1581 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001582 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001584 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001585 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001587 if (rc < 0)
1588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 }
1590
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001591 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001592 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593 PyErr_SetString(
1594 PyExc_ValueError,
1595 "list.remove(x): x not in list"
1596 );
1597 return NULL;
1598 }
1599
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001600 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601
1602 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 for (; i < self->extra->length; i++)
1604 self->extra->children[i] = self->extra->children[i+1];
1605
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001606 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 Py_RETURN_NONE;
1608}
1609
1610static PyObject*
1611element_repr(ElementObject* self)
1612{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001613 int status;
1614
1615 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001616 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001617
1618 status = Py_ReprEnter((PyObject *)self);
1619 if (status == 0) {
1620 PyObject *res;
1621 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1622 Py_ReprLeave((PyObject *)self);
1623 return res;
1624 }
1625 if (status > 0)
1626 PyErr_Format(PyExc_RuntimeError,
1627 "reentrant call inside %s.__repr__",
1628 Py_TYPE(self)->tp_name);
1629 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630}
1631
Serhiy Storchakacb985562015-05-04 15:32:48 +03001632/*[clinic input]
1633_elementtree.Element.set
1634
1635 key: object
1636 value: object
1637 /
1638
1639[clinic start generated code]*/
1640
1641static PyObject *
1642_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1643 PyObject *value)
1644/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645{
1646 PyObject* attrib;
1647
Victor Stinner5f0af232013-07-11 23:01:36 +02001648 if (!self->extra) {
1649 if (create_extra(self, NULL) < 0)
1650 return NULL;
1651 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652
1653 attrib = element_get_attrib(self);
1654 if (!attrib)
1655 return NULL;
1656
1657 if (PyDict_SetItem(attrib, key, value) < 0)
1658 return NULL;
1659
1660 Py_RETURN_NONE;
1661}
1662
1663static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001664element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001666 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001667 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 PyObject* old;
1669
1670 if (!self->extra || index < 0 || index >= self->extra->length) {
1671 PyErr_SetString(
1672 PyExc_IndexError,
1673 "child assignment index out of range");
1674 return -1;
1675 }
1676
1677 old = self->extra->children[index];
1678
1679 if (item) {
1680 Py_INCREF(item);
1681 self->extra->children[index] = item;
1682 } else {
1683 self->extra->length--;
1684 for (i = index; i < self->extra->length; i++)
1685 self->extra->children[i] = self->extra->children[i+1];
1686 }
1687
1688 Py_DECREF(old);
1689
1690 return 0;
1691}
1692
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001693static PyObject*
1694element_subscr(PyObject* self_, PyObject* item)
1695{
1696 ElementObject* self = (ElementObject*) self_;
1697
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001698 if (PyIndex_Check(item)) {
1699 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001700
1701 if (i == -1 && PyErr_Occurred()) {
1702 return NULL;
1703 }
1704 if (i < 0 && self->extra)
1705 i += self->extra->length;
1706 return element_getitem(self_, i);
1707 }
1708 else if (PySlice_Check(item)) {
1709 Py_ssize_t start, stop, step, slicelen, cur, i;
1710 PyObject* list;
1711
1712 if (!self->extra)
1713 return PyList_New(0);
1714
Serhiy Storchakac26b19d2017-04-08 11:18:14 +03001715 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001716 return NULL;
1717 }
Serhiy Storchakac26b19d2017-04-08 11:18:14 +03001718 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1719 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001720
1721 if (slicelen <= 0)
1722 return PyList_New(0);
1723 else {
1724 list = PyList_New(slicelen);
1725 if (!list)
1726 return NULL;
1727
1728 for (cur = start, i = 0; i < slicelen;
1729 cur += step, i++) {
1730 PyObject* item = self->extra->children[cur];
1731 Py_INCREF(item);
1732 PyList_SET_ITEM(list, i, item);
1733 }
1734
1735 return list;
1736 }
1737 }
1738 else {
1739 PyErr_SetString(PyExc_TypeError,
1740 "element indices must be integers");
1741 return NULL;
1742 }
1743}
1744
1745static int
1746element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1747{
1748 ElementObject* self = (ElementObject*) self_;
1749
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001750 if (PyIndex_Check(item)) {
1751 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752
1753 if (i == -1 && PyErr_Occurred()) {
1754 return -1;
1755 }
1756 if (i < 0 && self->extra)
1757 i += self->extra->length;
1758 return element_setitem(self_, i, value);
1759 }
1760 else if (PySlice_Check(item)) {
1761 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1762
1763 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001764 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001765
Victor Stinner5f0af232013-07-11 23:01:36 +02001766 if (!self->extra) {
1767 if (create_extra(self, NULL) < 0)
1768 return -1;
1769 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001770
Serhiy Storchakac26b19d2017-04-08 11:18:14 +03001771 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772 return -1;
1773 }
Serhiy Storchakac26b19d2017-04-08 11:18:14 +03001774 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1775 step);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001776
Eli Bendersky865756a2012-03-09 13:38:15 +02001777 if (value == NULL) {
1778 /* Delete slice */
1779 size_t cur;
1780 Py_ssize_t i;
1781
1782 if (slicelen <= 0)
1783 return 0;
1784
1785 /* Since we're deleting, the direction of the range doesn't matter,
1786 * so for simplicity make it always ascending.
1787 */
1788 if (step < 0) {
1789 stop = start + 1;
1790 start = stop + step * (slicelen - 1) - 1;
1791 step = -step;
1792 }
1793
Benjamin Peterson2f8bfef2016-09-07 09:26:18 -07001794 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
Eli Bendersky865756a2012-03-09 13:38:15 +02001795
1796 /* recycle is a list that will contain all the children
1797 * scheduled for removal.
1798 */
1799 if (!(recycle = PyList_New(slicelen))) {
1800 PyErr_NoMemory();
1801 return -1;
1802 }
1803
1804 /* This loop walks over all the children that have to be deleted,
1805 * with cur pointing at them. num_moved is the amount of children
1806 * until the next deleted child that have to be "shifted down" to
1807 * occupy the deleted's places.
1808 * Note that in the ith iteration, shifting is done i+i places down
1809 * because i children were already removed.
1810 */
1811 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1812 /* Compute how many children have to be moved, clipping at the
1813 * list end.
1814 */
1815 Py_ssize_t num_moved = step - 1;
1816 if (cur + step >= (size_t)self->extra->length) {
1817 num_moved = self->extra->length - cur - 1;
1818 }
1819
1820 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1821
1822 memmove(
1823 self->extra->children + cur - i,
1824 self->extra->children + cur + 1,
1825 num_moved * sizeof(PyObject *));
1826 }
1827
1828 /* Leftover "tail" after the last removed child */
1829 cur = start + (size_t)slicelen * step;
1830 if (cur < (size_t)self->extra->length) {
1831 memmove(
1832 self->extra->children + cur - slicelen,
1833 self->extra->children + cur,
1834 (self->extra->length - cur) * sizeof(PyObject *));
1835 }
1836
1837 self->extra->length -= slicelen;
1838
1839 /* Discard the recycle list with all the deleted sub-elements */
1840 Py_XDECREF(recycle);
1841 return 0;
1842 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001843
1844 /* A new slice is actually being assigned */
1845 seq = PySequence_Fast(value, "");
1846 if (!seq) {
1847 PyErr_Format(
1848 PyExc_TypeError,
1849 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1850 );
1851 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001853 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854
1855 if (step != 1 && newlen != slicelen)
1856 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001857 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 "attempt to assign sequence of size %zd "
1860 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001861 newlen, slicelen
1862 );
1863 return -1;
1864 }
1865
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866 /* Resize before creating the recycle bin, to prevent refleaks. */
1867 if (newlen > slicelen) {
1868 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870 return -1;
1871 }
1872 }
1873
1874 if (slicelen > 0) {
1875 /* to avoid recursive calls to this method (via decref), move
1876 old items to the recycle bin here, and get rid of them when
1877 we're done modifying the element */
1878 recycle = PyList_New(slicelen);
1879 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001880 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881 return -1;
1882 }
1883 for (cur = start, i = 0; i < slicelen;
1884 cur += step, i++)
1885 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1886 }
1887
1888 if (newlen < slicelen) {
1889 /* delete slice */
1890 for (i = stop; i < self->extra->length; i++)
1891 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1892 } else if (newlen > slicelen) {
1893 /* insert slice */
1894 for (i = self->extra->length-1; i >= stop; i--)
1895 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1896 }
1897
1898 /* replace the slice */
1899 for (cur = start, i = 0; i < newlen;
1900 cur += step, i++) {
1901 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1902 Py_INCREF(element);
1903 self->extra->children[cur] = element;
1904 }
1905
1906 self->extra->length += newlen - slicelen;
1907
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001908 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001909
1910 /* discard the recycle bin, and everything in it */
1911 Py_XDECREF(recycle);
1912
1913 return 0;
1914 }
1915 else {
1916 PyErr_SetString(PyExc_TypeError,
1917 "element indices must be integers");
1918 return -1;
1919 }
1920}
1921
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001922static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001923element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001925 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001926 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927 return res;
1928}
1929
Serhiy Storchakadde08152015-11-25 15:28:13 +02001930static PyObject*
1931element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001933 PyObject *res = element_get_text(self);
1934 Py_XINCREF(res);
1935 return res;
1936}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001937
Serhiy Storchakadde08152015-11-25 15:28:13 +02001938static PyObject*
1939element_tail_getter(ElementObject *self, void *closure)
1940{
1941 PyObject *res = element_get_tail(self);
1942 Py_XINCREF(res);
1943 return res;
1944}
1945
1946static PyObject*
1947element_attrib_getter(ElementObject *self, void *closure)
1948{
1949 PyObject *res;
1950 if (!self->extra) {
1951 if (create_extra(self, NULL) < 0)
1952 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001953 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001954 res = element_get_attrib(self);
1955 Py_XINCREF(res);
1956 return res;
1957}
Victor Stinner4d463432013-07-11 23:05:03 +02001958
Serhiy Storchakadde08152015-11-25 15:28:13 +02001959/* macro for setter validation */
1960#define _VALIDATE_ATTR_VALUE(V) \
1961 if ((V) == NULL) { \
1962 PyErr_SetString( \
1963 PyExc_AttributeError, \
1964 "can't delete element attribute"); \
1965 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001966 }
1967
Serhiy Storchakadde08152015-11-25 15:28:13 +02001968static int
1969element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1970{
1971 _VALIDATE_ATTR_VALUE(value);
1972 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001973 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001974 return 0;
1975}
1976
1977static int
1978element_text_setter(ElementObject *self, PyObject *value, void *closure)
1979{
1980 _VALIDATE_ATTR_VALUE(value);
1981 Py_INCREF(value);
1982 Py_DECREF(JOIN_OBJ(self->text));
1983 self->text = value;
1984 return 0;
1985}
1986
1987static int
1988element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1989{
1990 _VALIDATE_ATTR_VALUE(value);
1991 Py_INCREF(value);
1992 Py_DECREF(JOIN_OBJ(self->tail));
1993 self->tail = value;
1994 return 0;
1995}
1996
1997static int
1998element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1999{
2000 _VALIDATE_ATTR_VALUE(value);
2001 if (!self->extra) {
2002 if (create_extra(self, NULL) < 0)
2003 return -1;
2004 }
2005 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002006 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002007 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008}
2009
2010static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002012 0, /* sq_concat */
2013 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002015 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002016 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002017 0,
2018};
2019
Eli Bendersky64d11e62012-06-15 07:42:50 +03002020/******************************* Element iterator ****************************/
2021
2022/* ElementIterObject represents the iteration state over an XML element in
2023 * pre-order traversal. To keep track of which sub-element should be returned
2024 * next, a stack of parents is maintained. This is a standard stack-based
2025 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002026 * The stack is managed using a continuous array.
2027 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002028 * the current one is exhausted, and the next child to examine in that parent.
2029 */
2030typedef struct ParentLocator_t {
2031 ElementObject *parent;
2032 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002033} ParentLocator;
2034
2035typedef struct {
2036 PyObject_HEAD
2037 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002038 Py_ssize_t parent_stack_used;
2039 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002040 ElementObject *root_element;
2041 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002042 int gettext;
2043} ElementIterObject;
2044
2045
2046static void
2047elementiter_dealloc(ElementIterObject *it)
2048{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002049 Py_ssize_t i = it->parent_stack_used;
2050 it->parent_stack_used = 0;
2051 while (i--)
2052 Py_XDECREF(it->parent_stack[i].parent);
2053 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002054
2055 Py_XDECREF(it->sought_tag);
2056 Py_XDECREF(it->root_element);
2057
2058 PyObject_GC_UnTrack(it);
2059 PyObject_GC_Del(it);
2060}
2061
2062static int
2063elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2064{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002065 Py_ssize_t i = it->parent_stack_used;
2066 while (i--)
2067 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002068
2069 Py_VISIT(it->root_element);
2070 Py_VISIT(it->sought_tag);
2071 return 0;
2072}
2073
2074/* Helper function for elementiter_next. Add a new parent to the parent stack.
2075 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002076static int
2077parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002079 ParentLocator *item;
2080
2081 if (it->parent_stack_used >= it->parent_stack_size) {
2082 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2083 ParentLocator *parent_stack = it->parent_stack;
2084 PyMem_Resize(parent_stack, ParentLocator, new_size);
2085 if (parent_stack == NULL)
2086 return -1;
2087 it->parent_stack = parent_stack;
2088 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002089 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002090 item = it->parent_stack + it->parent_stack_used++;
2091 Py_INCREF(parent);
2092 item->parent = parent;
2093 item->child_index = 0;
2094 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095}
2096
2097static PyObject *
2098elementiter_next(ElementIterObject *it)
2099{
2100 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002101 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102 * A short note on gettext: this function serves both the iter() and
2103 * itertext() methods to avoid code duplication. However, there are a few
2104 * small differences in the way these iterations work. Namely:
2105 * - itertext() only yields text from nodes that have it, and continues
2106 * iterating when a node doesn't have text (so it doesn't return any
2107 * node like iter())
2108 * - itertext() also has to handle tail, after finishing with all the
2109 * children of a node.
2110 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002111 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002112 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002113 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114
2115 while (1) {
2116 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002117 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118 * iterator is exhausted.
2119 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002120 if (!it->parent_stack_used) {
2121 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002122 PyErr_SetNone(PyExc_StopIteration);
2123 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 }
2125
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002126 elem = it->root_element; /* steals a reference */
2127 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002128 }
2129 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 /* See if there are children left to traverse in the current parent. If
2131 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002133 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2134 Py_ssize_t child_index = item->child_index;
2135 ElementObjectExtra *extra;
2136 elem = item->parent;
2137 extra = elem->extra;
2138 if (!extra || child_index >= extra->length) {
2139 it->parent_stack_used--;
2140 /* Note that extra condition on it->parent_stack_used here;
2141 * this is because itertext() is supposed to only return *inner*
2142 * text, not text following the element it began iteration with.
2143 */
2144 if (it->gettext && it->parent_stack_used) {
2145 text = element_get_tail(elem);
2146 goto gettext;
2147 }
2148 Py_DECREF(elem);
2149 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002150 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002152 if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) {
2153 PyErr_Format(PyExc_AttributeError,
2154 "'%.100s' object has no attribute 'iter'",
2155 Py_TYPE(extra->children[child_index])->tp_name);
2156 return NULL;
2157 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002158 elem = (ElementObject *)extra->children[child_index];
2159 item->child_index++;
2160 Py_INCREF(elem);
2161 }
2162
2163 if (parent_stack_push_new(it, elem) < 0) {
2164 Py_DECREF(elem);
2165 PyErr_NoMemory();
2166 return NULL;
2167 }
2168 if (it->gettext) {
2169 text = element_get_text(elem);
2170 goto gettext;
2171 }
2172
2173 if (it->sought_tag == Py_None)
2174 return (PyObject *)elem;
2175
2176 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2177 if (rc > 0)
2178 return (PyObject *)elem;
2179
2180 Py_DECREF(elem);
2181 if (rc < 0)
2182 return NULL;
2183 continue;
2184
2185gettext:
2186 if (!text) {
2187 Py_DECREF(elem);
2188 return NULL;
2189 }
2190 if (text == Py_None) {
2191 Py_DECREF(elem);
2192 }
2193 else {
2194 Py_INCREF(text);
2195 Py_DECREF(elem);
2196 rc = PyObject_IsTrue(text);
2197 if (rc > 0)
2198 return text;
2199 Py_DECREF(text);
2200 if (rc < 0)
2201 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002202 }
2203 }
2204
2205 return NULL;
2206}
2207
2208
2209static PyTypeObject ElementIter_Type = {
2210 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002211 /* Using the module's name since the pure-Python implementation does not
2212 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002213 "_elementtree._element_iterator", /* tp_name */
2214 sizeof(ElementIterObject), /* tp_basicsize */
2215 0, /* tp_itemsize */
2216 /* methods */
2217 (destructor)elementiter_dealloc, /* tp_dealloc */
2218 0, /* tp_print */
2219 0, /* tp_getattr */
2220 0, /* tp_setattr */
2221 0, /* tp_reserved */
2222 0, /* tp_repr */
2223 0, /* tp_as_number */
2224 0, /* tp_as_sequence */
2225 0, /* tp_as_mapping */
2226 0, /* tp_hash */
2227 0, /* tp_call */
2228 0, /* tp_str */
2229 0, /* tp_getattro */
2230 0, /* tp_setattro */
2231 0, /* tp_as_buffer */
2232 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2233 0, /* tp_doc */
2234 (traverseproc)elementiter_traverse, /* tp_traverse */
2235 0, /* tp_clear */
2236 0, /* tp_richcompare */
2237 0, /* tp_weaklistoffset */
2238 PyObject_SelfIter, /* tp_iter */
2239 (iternextfunc)elementiter_next, /* tp_iternext */
2240 0, /* tp_methods */
2241 0, /* tp_members */
2242 0, /* tp_getset */
2243 0, /* tp_base */
2244 0, /* tp_dict */
2245 0, /* tp_descr_get */
2246 0, /* tp_descr_set */
2247 0, /* tp_dictoffset */
2248 0, /* tp_init */
2249 0, /* tp_alloc */
2250 0, /* tp_new */
2251};
2252
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002253#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254
2255static PyObject *
2256create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2257{
2258 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002259
2260 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2261 if (!it)
2262 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002263
Victor Stinner4d463432013-07-11 23:05:03 +02002264 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002265 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002266 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002267 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002268 it->root_element = self;
2269
Eli Bendersky64d11e62012-06-15 07:42:50 +03002270 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002271
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002272 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002273 if (it->parent_stack == NULL) {
2274 Py_DECREF(it);
2275 PyErr_NoMemory();
2276 return NULL;
2277 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002278 it->parent_stack_used = 0;
2279 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002280
Eli Bendersky64d11e62012-06-15 07:42:50 +03002281 return (PyObject *)it;
2282}
2283
2284
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285/* ==================================================================== */
2286/* the tree builder type */
2287
2288typedef struct {
2289 PyObject_HEAD
2290
Eli Bendersky58d548d2012-05-29 15:45:16 +03002291 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002292
Antoine Pitrouee329312012-10-04 19:53:29 +02002293 PyObject *this; /* current node */
2294 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295
Eli Bendersky58d548d2012-05-29 15:45:16 +03002296 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297
Eli Bendersky58d548d2012-05-29 15:45:16 +03002298 PyObject *stack; /* element stack */
2299 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300
Eli Bendersky48d358b2012-05-30 17:57:50 +03002301 PyObject *element_factory;
2302
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002304 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002305 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2306 PyObject *end_event_obj;
2307 PyObject *start_ns_event_obj;
2308 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309} TreeBuilderObject;
2310
Christian Heimes90aa7642007-12-19 02:45:37 +00002311#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002312
2313/* -------------------------------------------------------------------- */
2314/* constructor and destructor */
2315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316static PyObject *
2317treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002318{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002319 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2320 if (t != NULL) {
2321 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002324 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002325 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002326 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327
Eli Bendersky58d548d2012-05-29 15:45:16 +03002328 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002329 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002330 t->stack = PyList_New(20);
2331 if (!t->stack) {
2332 Py_DECREF(t->this);
2333 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002334 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002335 return NULL;
2336 }
2337 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002339 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002340 t->start_event_obj = t->end_event_obj = NULL;
2341 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2342 }
2343 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002344}
2345
Serhiy Storchakacb985562015-05-04 15:32:48 +03002346/*[clinic input]
2347_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002348
Serhiy Storchakacb985562015-05-04 15:32:48 +03002349 element_factory: object = NULL
2350
2351[clinic start generated code]*/
2352
2353static int
2354_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2355 PyObject *element_factory)
2356/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2357{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002358 if (element_factory) {
2359 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002360 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002361 }
2362
Eli Bendersky58d548d2012-05-29 15:45:16 +03002363 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002364}
2365
Eli Bendersky48d358b2012-05-30 17:57:50 +03002366static int
2367treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2368{
2369 Py_VISIT(self->root);
2370 Py_VISIT(self->this);
2371 Py_VISIT(self->last);
2372 Py_VISIT(self->data);
2373 Py_VISIT(self->stack);
2374 Py_VISIT(self->element_factory);
2375 return 0;
2376}
2377
2378static int
2379treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002381 Py_CLEAR(self->end_ns_event_obj);
2382 Py_CLEAR(self->start_ns_event_obj);
2383 Py_CLEAR(self->end_event_obj);
2384 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002385 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002386 Py_CLEAR(self->stack);
2387 Py_CLEAR(self->data);
2388 Py_CLEAR(self->last);
2389 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002390 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002391 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002392 return 0;
2393}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002394
Eli Bendersky48d358b2012-05-30 17:57:50 +03002395static void
2396treebuilder_dealloc(TreeBuilderObject *self)
2397{
2398 PyObject_GC_UnTrack(self);
2399 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002400 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002401}
2402
2403/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002404/* helpers for handling of arbitrary element-like objects */
2405
2406static int
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002407treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
Antoine Pitrouee329312012-10-04 19:53:29 +02002408 PyObject **dest, _Py_Identifier *name)
2409{
2410 if (Element_CheckExact(element)) {
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002411 PyObject *tmp = JOIN_OBJ(*dest);
2412 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2413 *data = NULL;
2414 Py_DECREF(tmp);
Antoine Pitrouee329312012-10-04 19:53:29 +02002415 return 0;
2416 }
2417 else {
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002418 PyObject *joined = list_join(*data);
Antoine Pitrouee329312012-10-04 19:53:29 +02002419 int r;
2420 if (joined == NULL)
2421 return -1;
2422 r = _PyObject_SetAttrId(element, name, joined);
2423 Py_DECREF(joined);
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002424 if (r < 0)
2425 return -1;
2426 Py_CLEAR(*data);
2427 return 0;
Antoine Pitrouee329312012-10-04 19:53:29 +02002428 }
2429}
2430
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002431LOCAL(int)
2432treebuilder_flush_data(TreeBuilderObject* self)
Antoine Pitrouee329312012-10-04 19:53:29 +02002433{
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002434 PyObject *element = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002435
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002436 if (!self->data) {
2437 return 0;
2438 }
2439
2440 if (self->this == element) {
2441 _Py_IDENTIFIER(text);
2442 return treebuilder_set_element_text_or_tail(
2443 element, &self->data,
2444 &((ElementObject *) element)->text, &PyId_text);
2445 }
2446 else {
2447 _Py_IDENTIFIER(tail);
2448 return treebuilder_set_element_text_or_tail(
2449 element, &self->data,
2450 &((ElementObject *) element)->tail, &PyId_tail);
2451 }
Antoine Pitrouee329312012-10-04 19:53:29 +02002452}
2453
2454static int
2455treebuilder_add_subelement(PyObject *element, PyObject *child)
2456{
2457 _Py_IDENTIFIER(append);
2458 if (Element_CheckExact(element)) {
2459 ElementObject *elem = (ElementObject *) element;
2460 return element_add_subelement(elem, child);
2461 }
2462 else {
2463 PyObject *res;
2464 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2465 if (res == NULL)
2466 return -1;
2467 Py_DECREF(res);
2468 return 0;
2469 }
2470}
2471
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002472LOCAL(int)
2473treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2474 PyObject *node)
2475{
2476 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002477 PyObject *res;
2478 PyObject *event = PyTuple_Pack(2, action, node);
2479 if (event == NULL)
2480 return -1;
2481 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2482 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002483 if (res == NULL)
2484 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002485 Py_DECREF(res);
2486 }
2487 return 0;
2488}
2489
Antoine Pitrouee329312012-10-04 19:53:29 +02002490/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002491/* handlers */
2492
2493LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002494treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2495 PyObject* attrib)
2496{
2497 PyObject* node;
2498 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002499 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002501 if (treebuilder_flush_data(self) < 0) {
2502 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002503 }
2504
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002505 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002506 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002507 } else if (attrib == Py_None) {
2508 attrib = PyDict_New();
2509 if (!attrib)
2510 return NULL;
2511 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2512 Py_DECREF(attrib);
2513 }
2514 else {
2515 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002516 }
2517 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002519 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520
Antoine Pitrouee329312012-10-04 19:53:29 +02002521 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522
2523 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002524 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002525 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526 } else {
2527 if (self->root) {
2528 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002529 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 "multiple elements on top level"
2531 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002532 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 }
2534 Py_INCREF(node);
2535 self->root = node;
2536 }
2537
2538 if (self->index < PyList_GET_SIZE(self->stack)) {
2539 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002540 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 Py_INCREF(this);
2542 } else {
2543 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002544 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545 }
2546 self->index++;
2547
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002549 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002551 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002553 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2554 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555
2556 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002557
2558 error:
2559 Py_DECREF(node);
2560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561}
2562
2563LOCAL(PyObject*)
2564treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2565{
2566 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002567 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002568 /* ignore calls to data before the first call to start */
2569 Py_RETURN_NONE;
2570 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002571 /* store the first item as is */
2572 Py_INCREF(data); self->data = data;
2573 } else {
2574 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002575 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2576 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002577 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002578 /* expat often generates single character data sections; handle
2579 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002580 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2581 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002582 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002583 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002584 } else if (PyList_CheckExact(self->data)) {
2585 if (PyList_Append(self->data, data) < 0)
2586 return NULL;
2587 } else {
2588 PyObject* list = PyList_New(2);
2589 if (!list)
2590 return NULL;
2591 PyList_SET_ITEM(list, 0, self->data);
2592 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2593 self->data = list;
2594 }
2595 }
2596
2597 Py_RETURN_NONE;
2598}
2599
2600LOCAL(PyObject*)
2601treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2602{
2603 PyObject* item;
2604
Serhiy Storchakaa6b4e192017-03-30 18:08:21 +03002605 if (treebuilder_flush_data(self) < 0) {
2606 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002607 }
2608
2609 if (self->index == 0) {
2610 PyErr_SetString(
2611 PyExc_IndexError,
2612 "pop from empty stack"
2613 );
2614 return NULL;
2615 }
2616
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002617 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002618 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002619 self->index--;
2620 self->this = PyList_GET_ITEM(self->stack, self->index);
2621 Py_INCREF(self->this);
2622 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002624 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2625 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626
2627 Py_INCREF(self->last);
2628 return (PyObject*) self->last;
2629}
2630
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631/* -------------------------------------------------------------------- */
2632/* methods (in alphabetical order) */
2633
Serhiy Storchakacb985562015-05-04 15:32:48 +03002634/*[clinic input]
2635_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636
Serhiy Storchakacb985562015-05-04 15:32:48 +03002637 data: object
2638 /
2639
2640[clinic start generated code]*/
2641
2642static PyObject *
2643_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2644/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2645{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 return treebuilder_handle_data(self, data);
2647}
2648
Serhiy Storchakacb985562015-05-04 15:32:48 +03002649/*[clinic input]
2650_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651
Serhiy Storchakacb985562015-05-04 15:32:48 +03002652 tag: object
2653 /
2654
2655[clinic start generated code]*/
2656
2657static PyObject *
2658_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2659/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2660{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661 return treebuilder_handle_end(self, tag);
2662}
2663
2664LOCAL(PyObject*)
2665treebuilder_done(TreeBuilderObject* self)
2666{
2667 PyObject* res;
2668
2669 /* FIXME: check stack size? */
2670
2671 if (self->root)
2672 res = self->root;
2673 else
2674 res = Py_None;
2675
2676 Py_INCREF(res);
2677 return res;
2678}
2679
Serhiy Storchakacb985562015-05-04 15:32:48 +03002680/*[clinic input]
2681_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682
Serhiy Storchakacb985562015-05-04 15:32:48 +03002683[clinic start generated code]*/
2684
2685static PyObject *
2686_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2687/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2688{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002689 return treebuilder_done(self);
2690}
2691
Serhiy Storchakacb985562015-05-04 15:32:48 +03002692/*[clinic input]
2693_elementtree.TreeBuilder.start
2694
2695 tag: object
2696 attrs: object = None
2697 /
2698
2699[clinic start generated code]*/
2700
2701static PyObject *
2702_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2703 PyObject *attrs)
2704/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002706 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707}
2708
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709/* ==================================================================== */
2710/* the expat interface */
2711
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002712#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002714
2715/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2716 * cached globally without being in per-module state.
2717 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002718static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720
Eli Bendersky52467b12012-06-01 07:13:08 +03002721static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2722 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724typedef struct {
2725 PyObject_HEAD
2726
2727 XML_Parser parser;
2728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *target;
2730 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002734 PyObject *handle_start;
2735 PyObject *handle_data;
2736 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002738 PyObject *handle_comment;
2739 PyObject *handle_pi;
2740 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002742 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002743
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744} XMLParserObject;
2745
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002746static PyObject*
2747_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2748static PyObject *
2749_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2750 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752/* helpers */
2753
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754LOCAL(PyObject*)
2755makeuniversal(XMLParserObject* self, const char* string)
2756{
2757 /* convert a UTF-8 tag/attribute name from the expat parser
2758 to a universal name string */
2759
Antoine Pitrouc1948842012-10-01 23:40:37 +02002760 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 PyObject* key;
2762 PyObject* value;
2763
2764 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002765 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766 if (!key)
2767 return NULL;
2768
2769 value = PyDict_GetItem(self->names, key);
2770
2771 if (value) {
2772 Py_INCREF(value);
2773 } else {
2774 /* new name. convert to universal name, and decode as
2775 necessary */
2776
2777 PyObject* tag;
2778 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002779 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780
2781 /* look for namespace separator */
2782 for (i = 0; i < size; i++)
2783 if (string[i] == '}')
2784 break;
2785 if (i != size) {
2786 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002787 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002788 if (tag == NULL) {
2789 Py_DECREF(key);
2790 return NULL;
2791 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002792 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 p[0] = '{';
2794 memcpy(p+1, string, size);
2795 size++;
2796 } else {
2797 /* plain name; use key as tag */
2798 Py_INCREF(key);
2799 tag = key;
2800 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002801
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002803 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002804 value = PyUnicode_DecodeUTF8(p, size, "strict");
2805 Py_DECREF(tag);
2806 if (!value) {
2807 Py_DECREF(key);
2808 return NULL;
2809 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002810
2811 /* add to names dictionary */
2812 if (PyDict_SetItem(self->names, key, value) < 0) {
2813 Py_DECREF(key);
2814 Py_DECREF(value);
2815 return NULL;
2816 }
2817 }
2818
2819 Py_DECREF(key);
2820 return value;
2821}
2822
Eli Bendersky5b77d812012-03-16 08:20:05 +02002823/* Set the ParseError exception with the given parameters.
2824 * If message is not NULL, it's used as the error string. Otherwise, the
2825 * message string is the default for the given error_code.
2826*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002827static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002828expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2829 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002830{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002831 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002832 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002833
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002834 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002835 message ? message : EXPAT(ErrorString)(error_code),
2836 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002837 if (errmsg == NULL)
2838 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002839
Eli Bendersky532d03e2013-08-10 08:00:39 -07002840 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002841 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002842 if (!error)
2843 return;
2844
Eli Bendersky5b77d812012-03-16 08:20:05 +02002845 /* Add code and position attributes */
2846 code = PyLong_FromLong((long)error_code);
2847 if (!code) {
2848 Py_DECREF(error);
2849 return;
2850 }
2851 if (PyObject_SetAttrString(error, "code", code) == -1) {
2852 Py_DECREF(error);
2853 Py_DECREF(code);
2854 return;
2855 }
2856 Py_DECREF(code);
2857
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002858 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002859 if (!position) {
2860 Py_DECREF(error);
2861 return;
2862 }
2863 if (PyObject_SetAttrString(error, "position", position) == -1) {
2864 Py_DECREF(error);
2865 Py_DECREF(position);
2866 return;
2867 }
2868 Py_DECREF(position);
2869
Eli Bendersky532d03e2013-08-10 08:00:39 -07002870 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871 Py_DECREF(error);
2872}
2873
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002874/* -------------------------------------------------------------------- */
2875/* handlers */
2876
2877static void
2878expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2879 int data_len)
2880{
2881 PyObject* key;
2882 PyObject* value;
2883 PyObject* res;
2884
2885 if (data_len < 2 || data_in[0] != '&')
2886 return;
2887
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002888 if (PyErr_Occurred())
2889 return;
2890
Neal Norwitz0269b912007-08-08 06:56:02 +00002891 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892 if (!key)
2893 return;
2894
2895 value = PyDict_GetItem(self->entity, key);
2896
2897 if (value) {
2898 if (TreeBuilder_CheckExact(self->target))
2899 res = treebuilder_handle_data(
2900 (TreeBuilderObject*) self->target, value
2901 );
2902 else if (self->handle_data)
2903 res = PyObject_CallFunction(self->handle_data, "O", value);
2904 else
2905 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002906 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002907 } else if (!PyErr_Occurred()) {
2908 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002909 char message[128] = "undefined entity ";
2910 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002911 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002912 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002914 EXPAT(GetErrorColumnNumber)(self->parser),
2915 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002916 );
2917 }
2918
2919 Py_DECREF(key);
2920}
2921
2922static void
2923expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2924 const XML_Char **attrib_in)
2925{
2926 PyObject* res;
2927 PyObject* tag;
2928 PyObject* attrib;
2929 int ok;
2930
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002931 if (PyErr_Occurred())
2932 return;
2933
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 /* tag name */
2935 tag = makeuniversal(self, tag_in);
2936 if (!tag)
2937 return; /* parser will look for errors */
2938
2939 /* attributes */
2940 if (attrib_in[0]) {
2941 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002942 if (!attrib) {
2943 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002945 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 while (attrib_in[0] && attrib_in[1]) {
2947 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002948 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 if (!key || !value) {
2950 Py_XDECREF(value);
2951 Py_XDECREF(key);
2952 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002953 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002954 return;
2955 }
2956 ok = PyDict_SetItem(attrib, key, value);
2957 Py_DECREF(value);
2958 Py_DECREF(key);
2959 if (ok < 0) {
2960 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002961 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 return;
2963 }
2964 attrib_in += 2;
2965 }
2966 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002967 Py_INCREF(Py_None);
2968 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002969 }
2970
2971 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 /* shortcut */
2973 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2974 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002975 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002976 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002977 if (attrib == Py_None) {
2978 Py_DECREF(attrib);
2979 attrib = PyDict_New();
2980 if (!attrib) {
2981 Py_DECREF(tag);
2982 return;
2983 }
2984 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002986 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 res = NULL;
2988
2989 Py_DECREF(tag);
2990 Py_DECREF(attrib);
2991
2992 Py_XDECREF(res);
2993}
2994
2995static void
2996expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2997 int data_len)
2998{
2999 PyObject* data;
3000 PyObject* res;
3001
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003002 if (PyErr_Occurred())
3003 return;
3004
Neal Norwitz0269b912007-08-08 06:56:02 +00003005 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003006 if (!data)
3007 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008
3009 if (TreeBuilder_CheckExact(self->target))
3010 /* shortcut */
3011 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3012 else if (self->handle_data)
3013 res = PyObject_CallFunction(self->handle_data, "O", data);
3014 else
3015 res = NULL;
3016
3017 Py_DECREF(data);
3018
3019 Py_XDECREF(res);
3020}
3021
3022static void
3023expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3024{
3025 PyObject* tag;
3026 PyObject* res = NULL;
3027
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003028 if (PyErr_Occurred())
3029 return;
3030
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031 if (TreeBuilder_CheckExact(self->target))
3032 /* shortcut */
3033 /* the standard tree builder doesn't look at the end tag */
3034 res = treebuilder_handle_end(
3035 (TreeBuilderObject*) self->target, Py_None
3036 );
3037 else if (self->handle_end) {
3038 tag = makeuniversal(self, tag_in);
3039 if (tag) {
3040 res = PyObject_CallFunction(self->handle_end, "O", tag);
3041 Py_DECREF(tag);
3042 }
3043 }
3044
3045 Py_XDECREF(res);
3046}
3047
3048static void
3049expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3050 const XML_Char *uri)
3051{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003052 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3053 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003054
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003055 if (PyErr_Occurred())
3056 return;
3057
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003058 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003059 return;
3060
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003061 if (!uri)
3062 uri = "";
3063 if (!prefix)
3064 prefix = "";
3065
3066 parcel = Py_BuildValue("ss", prefix, uri);
3067 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003068 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003069 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3070 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003071}
3072
3073static void
3074expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3075{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003076 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3077
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003078 if (PyErr_Occurred())
3079 return;
3080
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003081 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003082 return;
3083
3084 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085}
3086
3087static void
3088expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3089{
3090 PyObject* comment;
3091 PyObject* res;
3092
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003093 if (PyErr_Occurred())
3094 return;
3095
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003097 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098 if (comment) {
3099 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3100 Py_XDECREF(res);
3101 Py_DECREF(comment);
3102 }
3103 }
3104}
3105
Eli Bendersky45839902013-01-13 05:14:47 -08003106static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003107expat_start_doctype_handler(XMLParserObject *self,
3108 const XML_Char *doctype_name,
3109 const XML_Char *sysid,
3110 const XML_Char *pubid,
3111 int has_internal_subset)
3112{
3113 PyObject *self_pyobj = (PyObject *)self;
3114 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3115 PyObject *parser_doctype = NULL;
3116 PyObject *res = NULL;
3117
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003118 if (PyErr_Occurred())
3119 return;
3120
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003121 doctype_name_obj = makeuniversal(self, doctype_name);
3122 if (!doctype_name_obj)
3123 return;
3124
3125 if (sysid) {
3126 sysid_obj = makeuniversal(self, sysid);
3127 if (!sysid_obj) {
3128 Py_DECREF(doctype_name_obj);
3129 return;
3130 }
3131 } else {
3132 Py_INCREF(Py_None);
3133 sysid_obj = Py_None;
3134 }
3135
3136 if (pubid) {
3137 pubid_obj = makeuniversal(self, pubid);
3138 if (!pubid_obj) {
3139 Py_DECREF(doctype_name_obj);
3140 Py_DECREF(sysid_obj);
3141 return;
3142 }
3143 } else {
3144 Py_INCREF(Py_None);
3145 pubid_obj = Py_None;
3146 }
3147
3148 /* If the target has a handler for doctype, call it. */
3149 if (self->handle_doctype) {
3150 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3151 doctype_name_obj, pubid_obj, sysid_obj);
3152 Py_CLEAR(res);
3153 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003154 else {
3155 /* Now see if the parser itself has a doctype method. If yes and it's
3156 * a custom method, call it but warn about deprecation. If it's only
3157 * the vanilla XMLParser method, do nothing.
3158 */
3159 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3160 if (parser_doctype &&
3161 !(PyCFunction_Check(parser_doctype) &&
3162 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3163 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003164 (PyCFunction) _elementtree_XMLParser_doctype)) {
3165 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3166 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003167 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003168 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003169 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003170 res = PyObject_CallFunction(parser_doctype, "OOO",
3171 doctype_name_obj, pubid_obj, sysid_obj);
3172 Py_CLEAR(res);
3173 }
3174 }
3175
3176clear:
3177 Py_XDECREF(parser_doctype);
3178 Py_DECREF(doctype_name_obj);
3179 Py_DECREF(pubid_obj);
3180 Py_DECREF(sysid_obj);
3181}
3182
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003183static void
3184expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3185 const XML_Char* data_in)
3186{
3187 PyObject* target;
3188 PyObject* data;
3189 PyObject* res;
3190
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003191 if (PyErr_Occurred())
3192 return;
3193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003194 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003195 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3196 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003197 if (target && data) {
3198 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3199 Py_XDECREF(res);
3200 Py_DECREF(data);
3201 Py_DECREF(target);
3202 } else {
3203 Py_XDECREF(data);
3204 Py_XDECREF(target);
3205 }
3206 }
3207}
3208
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210
Eli Bendersky52467b12012-06-01 07:13:08 +03003211static PyObject *
3212xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003213{
Eli Bendersky52467b12012-06-01 07:13:08 +03003214 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3215 if (self) {
3216 self->parser = NULL;
3217 self->target = self->entity = self->names = NULL;
3218 self->handle_start = self->handle_data = self->handle_end = NULL;
3219 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003220 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 return (PyObject *)self;
3223}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003224
Serhiy Storchakacb985562015-05-04 15:32:48 +03003225/*[clinic input]
3226_elementtree.XMLParser.__init__
3227
3228 html: object = NULL
3229 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003230 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003231
3232[clinic start generated code]*/
3233
Eli Bendersky52467b12012-06-01 07:13:08 +03003234static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003235_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3236 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003237/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003238{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003239 self->entity = PyDict_New();
3240 if (!self->entity)
3241 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243 self->names = PyDict_New();
3244 if (!self->names) {
3245 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003246 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003248
Serhiy Storchakacb985562015-05-04 15:32:48 +03003249 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3250 if (!self->parser) {
3251 Py_CLEAR(self->entity);
3252 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 }
3256
Eli Bendersky52467b12012-06-01 07:13:08 +03003257 if (target) {
3258 Py_INCREF(target);
3259 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003260 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003262 Py_CLEAR(self->entity);
3263 Py_CLEAR(self->names);
3264 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003266 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003268 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 self->handle_start = PyObject_GetAttrString(target, "start");
3271 self->handle_data = PyObject_GetAttrString(target, "data");
3272 self->handle_end = PyObject_GetAttrString(target, "end");
3273 self->handle_comment = PyObject_GetAttrString(target, "comment");
3274 self->handle_pi = PyObject_GetAttrString(target, "pi");
3275 self->handle_close = PyObject_GetAttrString(target, "close");
3276 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277
3278 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003279
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003281 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003283 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 (XML_StartElementHandler) expat_start_handler,
3285 (XML_EndElementHandler) expat_end_handler
3286 );
3287 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 (XML_DefaultHandler) expat_default_handler
3290 );
3291 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003292 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003293 (XML_CharacterDataHandler) expat_data_handler
3294 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003295 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 (XML_CommentHandler) expat_comment_handler
3299 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003300 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 (XML_ProcessingInstructionHandler) expat_pi_handler
3304 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003305 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003307 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3308 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003311 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313
Eli Bendersky52467b12012-06-01 07:13:08 +03003314 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315}
3316
Eli Bendersky52467b12012-06-01 07:13:08 +03003317static int
3318xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3319{
3320 Py_VISIT(self->handle_close);
3321 Py_VISIT(self->handle_pi);
3322 Py_VISIT(self->handle_comment);
3323 Py_VISIT(self->handle_end);
3324 Py_VISIT(self->handle_data);
3325 Py_VISIT(self->handle_start);
3326
3327 Py_VISIT(self->target);
3328 Py_VISIT(self->entity);
3329 Py_VISIT(self->names);
3330
3331 return 0;
3332}
3333
3334static int
3335xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336{
3337 EXPAT(ParserFree)(self->parser);
3338
Antoine Pitrouc1948842012-10-01 23:40:37 +02003339 Py_CLEAR(self->handle_close);
3340 Py_CLEAR(self->handle_pi);
3341 Py_CLEAR(self->handle_comment);
3342 Py_CLEAR(self->handle_end);
3343 Py_CLEAR(self->handle_data);
3344 Py_CLEAR(self->handle_start);
3345 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346
Antoine Pitrouc1948842012-10-01 23:40:37 +02003347 Py_CLEAR(self->target);
3348 Py_CLEAR(self->entity);
3349 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350
Eli Bendersky52467b12012-06-01 07:13:08 +03003351 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352}
3353
Eli Bendersky52467b12012-06-01 07:13:08 +03003354static void
3355xmlparser_dealloc(XMLParserObject* self)
3356{
3357 PyObject_GC_UnTrack(self);
3358 xmlparser_gc_clear(self);
3359 Py_TYPE(self)->tp_free((PyObject *)self);
3360}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361
3362LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003363expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364{
3365 int ok;
3366
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003367 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3369
3370 if (PyErr_Occurred())
3371 return NULL;
3372
3373 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003374 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003375 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003376 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003377 EXPAT(GetErrorColumnNumber)(self->parser),
3378 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379 );
3380 return NULL;
3381 }
3382
3383 Py_RETURN_NONE;
3384}
3385
Serhiy Storchakacb985562015-05-04 15:32:48 +03003386/*[clinic input]
3387_elementtree.XMLParser.close
3388
3389[clinic start generated code]*/
3390
3391static PyObject *
3392_elementtree_XMLParser_close_impl(XMLParserObject *self)
3393/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394{
3395 /* end feeding data to parser */
3396
3397 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003399 if (!res)
3400 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003401
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003402 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403 Py_DECREF(res);
3404 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003405 }
3406 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003407 Py_DECREF(res);
Victor Stinner3466bde2016-09-05 18:16:01 -07003408 return _PyObject_CallNoArg(self->handle_close);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003409 }
3410 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003411 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003412 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413}
3414
Serhiy Storchakacb985562015-05-04 15:32:48 +03003415/*[clinic input]
3416_elementtree.XMLParser.feed
3417
3418 data: object
3419 /
3420
3421[clinic start generated code]*/
3422
3423static PyObject *
3424_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3425/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003426{
3427 /* feed data to parser */
3428
Serhiy Storchakacb985562015-05-04 15:32:48 +03003429 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003430 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003431 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3432 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003433 return NULL;
3434 if (data_len > INT_MAX) {
3435 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3436 return NULL;
3437 }
3438 /* Explicitly set UTF-8 encoding. Return code ignored. */
3439 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003440 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003441 }
3442 else {
3443 Py_buffer view;
3444 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003445 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003446 return NULL;
3447 if (view.len > INT_MAX) {
3448 PyBuffer_Release(&view);
3449 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3450 return NULL;
3451 }
3452 res = expat_parse(self, view.buf, (int)view.len, 0);
3453 PyBuffer_Release(&view);
3454 return res;
3455 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456}
3457
Serhiy Storchakacb985562015-05-04 15:32:48 +03003458/*[clinic input]
3459_elementtree.XMLParser._parse_whole
3460
3461 file: object
3462 /
3463
3464[clinic start generated code]*/
3465
3466static PyObject *
3467_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3468/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469{
Eli Benderskya3699232013-05-19 18:47:23 -07003470 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471 PyObject* reader;
3472 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003473 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474 PyObject* res;
3475
Serhiy Storchakacb985562015-05-04 15:32:48 +03003476 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003477 if (!reader)
3478 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003479
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003480 /* read from open file object */
3481 for (;;) {
3482
3483 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3484
3485 if (!buffer) {
3486 /* read failed (e.g. due to KeyboardInterrupt) */
3487 Py_DECREF(reader);
3488 return NULL;
3489 }
3490
Eli Benderskyf996e772012-03-16 05:53:30 +02003491 if (PyUnicode_CheckExact(buffer)) {
3492 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003493 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003494 Py_DECREF(buffer);
3495 break;
3496 }
3497 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003498 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003499 if (!temp) {
3500 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003501 Py_DECREF(reader);
3502 return NULL;
3503 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003504 buffer = temp;
3505 }
3506 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003507 Py_DECREF(buffer);
3508 break;
3509 }
3510
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003511 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3512 Py_DECREF(buffer);
3513 Py_DECREF(reader);
3514 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3515 return NULL;
3516 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003518 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003519 );
3520
3521 Py_DECREF(buffer);
3522
3523 if (!res) {
3524 Py_DECREF(reader);
3525 return NULL;
3526 }
3527 Py_DECREF(res);
3528
3529 }
3530
3531 Py_DECREF(reader);
3532
3533 res = expat_parse(self, "", 0, 1);
3534
3535 if (res && TreeBuilder_CheckExact(self->target)) {
3536 Py_DECREF(res);
3537 return treebuilder_done((TreeBuilderObject*) self->target);
3538 }
3539
3540 return res;
3541}
3542
Serhiy Storchakacb985562015-05-04 15:32:48 +03003543/*[clinic input]
3544_elementtree.XMLParser.doctype
3545
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003546 name: object
3547 pubid: object
3548 system: object
3549 /
3550
Serhiy Storchakacb985562015-05-04 15:32:48 +03003551[clinic start generated code]*/
3552
3553static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003554_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3555 PyObject *pubid, PyObject *system)
3556/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003557{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003558 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3559 "This method of XMLParser is deprecated. Define"
3560 " doctype() method on the TreeBuilder target.",
3561 1) < 0) {
3562 return NULL;
3563 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003564 Py_RETURN_NONE;
3565}
3566
Serhiy Storchakacb985562015-05-04 15:32:48 +03003567/*[clinic input]
3568_elementtree.XMLParser._setevents
3569
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003570 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003571 events_to_report: object = None
3572 /
3573
3574[clinic start generated code]*/
3575
3576static PyObject *
3577_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3578 PyObject *events_queue,
3579 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003580/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003581{
3582 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003583 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003584 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003585 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003586
3587 if (!TreeBuilder_CheckExact(self->target)) {
3588 PyErr_SetString(
3589 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003590 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003591 "targets"
3592 );
3593 return NULL;
3594 }
3595
3596 target = (TreeBuilderObject*) self->target;
3597
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003598 events_append = PyObject_GetAttrString(events_queue, "append");
3599 if (events_append == NULL)
3600 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003601 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602
3603 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003604 Py_CLEAR(target->start_event_obj);
3605 Py_CLEAR(target->end_event_obj);
3606 Py_CLEAR(target->start_ns_event_obj);
3607 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003609 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003611 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612 Py_RETURN_NONE;
3613 }
3614
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003615 if (!(events_seq = PySequence_Fast(events_to_report,
3616 "events must be a sequence"))) {
3617 return NULL;
3618 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003620 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003621 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3622 char *event_name = NULL;
3623 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003624 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003625 } else if (PyBytes_Check(event_name_obj)) {
3626 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003627 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003628 if (event_name == NULL) {
3629 Py_DECREF(events_seq);
3630 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3631 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003632 }
3633
3634 Py_INCREF(event_name_obj);
3635 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003636 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003637 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003638 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003639 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003640 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641 EXPAT(SetNamespaceDeclHandler)(
3642 self->parser,
3643 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3644 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3645 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003646 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003647 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 EXPAT(SetNamespaceDeclHandler)(
3649 self->parser,
3650 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3651 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3652 );
3653 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003654 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003655 Py_DECREF(events_seq);
3656 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657 return NULL;
3658 }
3659 }
3660
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003661 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003663}
3664
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003665static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003666xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003668 if (PyUnicode_Check(nameobj)) {
3669 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003670 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003671 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003672 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003673 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003674 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003675 return PyUnicode_FromFormat(
3676 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003678 }
3679 else
3680 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681
Alexander Belopolskye239d232010-12-08 23:31:48 +00003682 Py_INCREF(res);
3683 return res;
3684 }
3685 generic:
3686 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687}
3688
Serhiy Storchakacb985562015-05-04 15:32:48 +03003689#include "clinic/_elementtree.c.h"
3690
3691static PyMethodDef element_methods[] = {
3692
3693 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3694
3695 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3696 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3697
3698 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3699 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3700 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3701
3702 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3703 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3704 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3705 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3706
3707 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3708 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3709 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3710
Victor Stinner84d8baa2016-09-29 22:12:35 +02003711 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_FASTCALL, _elementtree_Element_iter__doc__},
Serhiy Storchakacb985562015-05-04 15:32:48 +03003712 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3713
3714 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3715 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3716
3717 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3718
3719 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3720 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3721 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3722 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3723 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3724
3725 {NULL, NULL}
3726};
3727
3728static PyMappingMethods element_as_mapping = {
3729 (lenfunc) element_length,
3730 (binaryfunc) element_subscr,
3731 (objobjargproc) element_ass_subscr,
3732};
3733
Serhiy Storchakadde08152015-11-25 15:28:13 +02003734static PyGetSetDef element_getsetlist[] = {
3735 {"tag",
3736 (getter)element_tag_getter,
3737 (setter)element_tag_setter,
3738 "A string identifying what kind of data this element represents"},
3739 {"text",
3740 (getter)element_text_getter,
3741 (setter)element_text_setter,
3742 "A string of text directly after the start tag, or None"},
3743 {"tail",
3744 (getter)element_tail_getter,
3745 (setter)element_tail_setter,
3746 "A string of text directly after the end tag, or None"},
3747 {"attrib",
3748 (getter)element_attrib_getter,
3749 (setter)element_attrib_setter,
3750 "A dictionary containing the element's attributes"},
3751 {NULL},
3752};
3753
Serhiy Storchakacb985562015-05-04 15:32:48 +03003754static PyTypeObject Element_Type = {
3755 PyVarObject_HEAD_INIT(NULL, 0)
3756 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3757 /* methods */
3758 (destructor)element_dealloc, /* tp_dealloc */
3759 0, /* tp_print */
3760 0, /* tp_getattr */
3761 0, /* tp_setattr */
3762 0, /* tp_reserved */
3763 (reprfunc)element_repr, /* tp_repr */
3764 0, /* tp_as_number */
3765 &element_as_sequence, /* tp_as_sequence */
3766 &element_as_mapping, /* tp_as_mapping */
3767 0, /* tp_hash */
3768 0, /* tp_call */
3769 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003770 PyObject_GenericGetAttr, /* tp_getattro */
3771 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003772 0, /* tp_as_buffer */
3773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3774 /* tp_flags */
3775 0, /* tp_doc */
3776 (traverseproc)element_gc_traverse, /* tp_traverse */
3777 (inquiry)element_gc_clear, /* tp_clear */
3778 0, /* tp_richcompare */
3779 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3780 0, /* tp_iter */
3781 0, /* tp_iternext */
3782 element_methods, /* tp_methods */
3783 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003784 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003785 0, /* tp_base */
3786 0, /* tp_dict */
3787 0, /* tp_descr_get */
3788 0, /* tp_descr_set */
3789 0, /* tp_dictoffset */
3790 (initproc)element_init, /* tp_init */
3791 PyType_GenericAlloc, /* tp_alloc */
3792 element_new, /* tp_new */
3793 0, /* tp_free */
3794};
3795
3796static PyMethodDef treebuilder_methods[] = {
3797 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3798 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3799 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3800 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3801 {NULL, NULL}
3802};
3803
3804static PyTypeObject TreeBuilder_Type = {
3805 PyVarObject_HEAD_INIT(NULL, 0)
3806 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3807 /* methods */
3808 (destructor)treebuilder_dealloc, /* tp_dealloc */
3809 0, /* tp_print */
3810 0, /* tp_getattr */
3811 0, /* tp_setattr */
3812 0, /* tp_reserved */
3813 0, /* tp_repr */
3814 0, /* tp_as_number */
3815 0, /* tp_as_sequence */
3816 0, /* tp_as_mapping */
3817 0, /* tp_hash */
3818 0, /* tp_call */
3819 0, /* tp_str */
3820 0, /* tp_getattro */
3821 0, /* tp_setattro */
3822 0, /* tp_as_buffer */
3823 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3824 /* tp_flags */
3825 0, /* tp_doc */
3826 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3827 (inquiry)treebuilder_gc_clear, /* tp_clear */
3828 0, /* tp_richcompare */
3829 0, /* tp_weaklistoffset */
3830 0, /* tp_iter */
3831 0, /* tp_iternext */
3832 treebuilder_methods, /* tp_methods */
3833 0, /* tp_members */
3834 0, /* tp_getset */
3835 0, /* tp_base */
3836 0, /* tp_dict */
3837 0, /* tp_descr_get */
3838 0, /* tp_descr_set */
3839 0, /* tp_dictoffset */
3840 _elementtree_TreeBuilder___init__, /* tp_init */
3841 PyType_GenericAlloc, /* tp_alloc */
3842 treebuilder_new, /* tp_new */
3843 0, /* tp_free */
3844};
3845
3846static PyMethodDef xmlparser_methods[] = {
3847 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3848 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3849 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3850 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3851 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3852 {NULL, NULL}
3853};
3854
Neal Norwitz227b5332006-03-22 09:28:35 +00003855static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003856 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003857 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003858 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003859 (destructor)xmlparser_dealloc, /* tp_dealloc */
3860 0, /* tp_print */
3861 0, /* tp_getattr */
3862 0, /* tp_setattr */
3863 0, /* tp_reserved */
3864 0, /* tp_repr */
3865 0, /* tp_as_number */
3866 0, /* tp_as_sequence */
3867 0, /* tp_as_mapping */
3868 0, /* tp_hash */
3869 0, /* tp_call */
3870 0, /* tp_str */
3871 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3872 0, /* tp_setattro */
3873 0, /* tp_as_buffer */
3874 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3875 /* tp_flags */
3876 0, /* tp_doc */
3877 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3878 (inquiry)xmlparser_gc_clear, /* tp_clear */
3879 0, /* tp_richcompare */
3880 0, /* tp_weaklistoffset */
3881 0, /* tp_iter */
3882 0, /* tp_iternext */
3883 xmlparser_methods, /* tp_methods */
3884 0, /* tp_members */
3885 0, /* tp_getset */
3886 0, /* tp_base */
3887 0, /* tp_dict */
3888 0, /* tp_descr_get */
3889 0, /* tp_descr_set */
3890 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003891 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003892 PyType_GenericAlloc, /* tp_alloc */
3893 xmlparser_new, /* tp_new */
3894 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003895};
3896
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003897/* ==================================================================== */
3898/* python module interface */
3899
3900static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003901 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003902 {NULL, NULL}
3903};
3904
Martin v. Löwis1a214512008-06-11 05:26:20 +00003905
Eli Bendersky532d03e2013-08-10 08:00:39 -07003906static struct PyModuleDef elementtreemodule = {
3907 PyModuleDef_HEAD_INIT,
3908 "_elementtree",
3909 NULL,
3910 sizeof(elementtreestate),
3911 _functions,
3912 NULL,
3913 elementtree_traverse,
3914 elementtree_clear,
3915 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003916};
3917
Neal Norwitzf6657e62006-12-28 04:47:50 +00003918PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003919PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003920{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003921 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003922 elementtreestate *st;
3923
3924 m = PyState_FindModule(&elementtreemodule);
3925 if (m) {
3926 Py_INCREF(m);
3927 return m;
3928 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003929
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003930 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003931 if (PyType_Ready(&ElementIter_Type) < 0)
3932 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003933 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003934 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003935 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003936 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003937 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003938 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003939
Eli Bendersky532d03e2013-08-10 08:00:39 -07003940 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003941 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003942 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003943 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003944
Eli Bendersky828efde2012-04-05 05:40:58 +03003945 if (!(temp = PyImport_ImportModule("copy")))
3946 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003947 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003948 Py_XDECREF(temp);
3949
Eli Bendersky532d03e2013-08-10 08:00:39 -07003950 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003951 return NULL;
3952
Eli Bendersky20d41742012-06-01 09:48:37 +03003953 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003954 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3955 if (expat_capi) {
3956 /* check that it's usable */
3957 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003958 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003959 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3960 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003961 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003962 PyErr_SetString(PyExc_ImportError,
3963 "pyexpat version is incompatible");
3964 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003965 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003966 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003967 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003968 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003969
Eli Bendersky532d03e2013-08-10 08:00:39 -07003970 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003971 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003972 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003973 Py_INCREF(st->parseerror_obj);
3974 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003975
Eli Bendersky092af1f2012-03-04 07:14:03 +02003976 Py_INCREF((PyObject *)&Element_Type);
3977 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3978
Eli Bendersky58d548d2012-05-29 15:45:16 +03003979 Py_INCREF((PyObject *)&TreeBuilder_Type);
3980 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3981
Eli Bendersky52467b12012-06-01 07:13:08 +03003982 Py_INCREF((PyObject *)&XMLParser_Type);
3983 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003984
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003985 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003986}