blob: b32f2ad2835ca9cfca3805b37978fde0eceaa818 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300629
630 if (self->weakreflist != NULL)
631 PyObject_ClearWeakRefs((PyObject *) self);
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 /* element_gc_clear clears all references and deallocates extra
634 */
635 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000636
637 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200638 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639}
640
641/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
Serhiy Storchakacb985562015-05-04 15:32:48 +0300643/*[clinic input]
644_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645
Serhiy Storchakacb985562015-05-04 15:32:48 +0300646 subelement: object(subclass_of='&Element_Type')
647 /
648
649[clinic start generated code]*/
650
651static PyObject *
652_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
653/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
654{
655 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656 return NULL;
657
658 Py_RETURN_NONE;
659}
660
Serhiy Storchakacb985562015-05-04 15:32:48 +0300661/*[clinic input]
662_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
Serhiy Storchakacb985562015-05-04 15:32:48 +0300664[clinic start generated code]*/
665
666static PyObject *
667_elementtree_Element_clear_impl(ElementObject *self)
668/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
669{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
672 Py_INCREF(Py_None);
673 Py_DECREF(JOIN_OBJ(self->text));
674 self->text = Py_None;
675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->tail));
678 self->tail = Py_None;
679
680 Py_RETURN_NONE;
681}
682
Serhiy Storchakacb985562015-05-04 15:32:48 +0300683/*[clinic input]
684_elementtree.Element.__copy__
685
686[clinic start generated code]*/
687
688static PyObject *
689_elementtree_Element___copy___impl(ElementObject *self)
690/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200692 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 ElementObject* element;
694
Eli Bendersky092af1f2012-03-04 07:14:03 +0200695 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800696 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 if (!element)
698 return NULL;
699
700 Py_DECREF(JOIN_OBJ(element->text));
701 element->text = self->text;
702 Py_INCREF(JOIN_OBJ(element->text));
703
704 Py_DECREF(JOIN_OBJ(element->tail));
705 element->tail = self->tail;
706 Py_INCREF(JOIN_OBJ(element->tail));
707
708 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000709 if (element_resize(element, self->extra->length) < 0) {
710 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 for (i = 0; i < self->extra->length; i++) {
715 Py_INCREF(self->extra->children[i]);
716 element->extra->children[i] = self->extra->children[i];
717 }
718
719 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 }
721
722 return (PyObject*) element;
723}
724
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200725/* Helper for a deep copy. */
726LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
727
Serhiy Storchakacb985562015-05-04 15:32:48 +0300728/*[clinic input]
729_elementtree.Element.__deepcopy__
730
731 memo: object
732 /
733
734[clinic start generated code]*/
735
736static PyObject *
737_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
738/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200740 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 ElementObject* element;
742 PyObject* tag;
743 PyObject* attrib;
744 PyObject* text;
745 PyObject* tail;
746 PyObject* id;
747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 tag = deepcopy(self->tag, memo);
749 if (!tag)
750 return NULL;
751
752 if (self->extra) {
753 attrib = deepcopy(self->extra->attrib, memo);
754 if (!attrib) {
755 Py_DECREF(tag);
756 return NULL;
757 }
758 } else {
759 Py_INCREF(Py_None);
760 attrib = Py_None;
761 }
762
Eli Bendersky092af1f2012-03-04 07:14:03 +0200763 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 Py_DECREF(tag);
766 Py_DECREF(attrib);
767
768 if (!element)
769 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 text = deepcopy(JOIN_OBJ(self->text), memo);
772 if (!text)
773 goto error;
774 Py_DECREF(element->text);
775 element->text = JOIN_SET(text, JOIN_GET(self->text));
776
777 tail = deepcopy(JOIN_OBJ(self->tail), memo);
778 if (!tail)
779 goto error;
780 Py_DECREF(element->tail);
781 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
782
783 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 if (element_resize(element, self->extra->length) < 0)
785 goto error;
786
787 for (i = 0; i < self->extra->length; i++) {
788 PyObject* child = deepcopy(self->extra->children[i], memo);
789 if (!child) {
790 element->extra->length = i;
791 goto error;
792 }
793 element->extra->children[i] = child;
794 }
795
796 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 }
798
799 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200800 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 if (!id)
802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 i = PyDict_SetItem(memo, id, (PyObject*) element);
805
806 Py_DECREF(id);
807
808 if (i < 0)
809 goto error;
810
811 return (PyObject*) element;
812
813 error:
814 Py_DECREF(element);
815 return NULL;
816}
817
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200818LOCAL(PyObject *)
819deepcopy(PyObject *object, PyObject *memo)
820{
821 /* do a deep copy of the given object */
822 PyObject *args;
823 PyObject *result;
824 elementtreestate *st;
825
826 /* Fast paths */
827 if (object == Py_None || PyUnicode_CheckExact(object)) {
828 Py_INCREF(object);
829 return object;
830 }
831
832 if (Py_REFCNT(object) == 1) {
833 if (PyDict_CheckExact(object)) {
834 PyObject *key, *value;
835 Py_ssize_t pos = 0;
836 int simple = 1;
837 while (PyDict_Next(object, &pos, &key, &value)) {
838 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
839 simple = 0;
840 break;
841 }
842 }
843 if (simple)
844 return PyDict_Copy(object);
845 /* Fall through to general case */
846 }
847 else if (Element_CheckExact(object)) {
848 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
849 }
850 }
851
852 /* General case */
853 st = ET_STATE_GLOBAL;
854 if (!st->deepcopy_obj) {
855 PyErr_SetString(PyExc_RuntimeError,
856 "deepcopy helper not found");
857 return NULL;
858 }
859
860 args = PyTuple_Pack(2, object, memo);
861 if (!args)
862 return NULL;
863 result = PyObject_CallObject(st->deepcopy_obj, args);
864 Py_DECREF(args);
865 return result;
866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->text);
970 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 _clear_joined_ptr(&self->tail);
974 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
975 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
1137 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001178 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001179 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
1189 if (!Element_CheckExact(item))
1190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001223 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001224 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225 );
1226
1227 if (!self->extra) {
1228 Py_INCREF(default_value);
1229 return default_value;
1230 }
1231
1232 for (i = 0; i < self->extra->length; i++) {
1233 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001234 int rc;
1235 if (!Element_CheckExact(item))
1236 continue;
1237 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001238 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001240 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (text == Py_None) {
1242 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001243 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001245 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247 return text;
1248 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 Py_DECREF(item);
1250 if (rc < 0)
1251 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 }
1253
1254 Py_INCREF(default_value);
1255 return default_value;
1256}
1257
Serhiy Storchakacb985562015-05-04 15:32:48 +03001258/*[clinic input]
1259_elementtree.Element.findall
1260
1261 path: object
1262 namespaces: object = None
1263
1264[clinic start generated code]*/
1265
1266static PyObject *
1267_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1268 PyObject *namespaces)
1269/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001270{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001271 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001273 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001274 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001279 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001281 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282
1283 out = PyList_New(0);
1284 if (!out)
1285 return NULL;
1286
1287 if (!self->extra)
1288 return out;
1289
1290 for (i = 0; i < self->extra->length; i++) {
1291 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 int rc;
1293 if (!Element_CheckExact(item))
1294 continue;
1295 Py_INCREF(item);
1296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1297 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1298 Py_DECREF(item);
1299 Py_DECREF(out);
1300 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 return out;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310
Serhiy Storchakacb985562015-05-04 15:32:48 +03001311 path: object
1312 namespaces: object = None
1313
1314[clinic start generated code]*/
1315
1316static PyObject *
1317_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1318 PyObject *namespaces)
1319/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1320{
1321 PyObject* tag = path;
1322 _Py_IDENTIFIER(iterfind);
1323 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001325 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001326 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.get
1331
1332 key: object
1333 default: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1339 PyObject *default_value)
1340/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
1342 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 if (!self->extra || self->extra->attrib == Py_None)
1345 value = default_value;
1346 else {
1347 value = PyDict_GetItem(self->extra->attrib, key);
1348 if (!value)
1349 value = default_value;
1350 }
1351
1352 Py_INCREF(value);
1353 return value;
1354}
1355
Serhiy Storchakacb985562015-05-04 15:32:48 +03001356/*[clinic input]
1357_elementtree.Element.getchildren
1358
1359[clinic start generated code]*/
1360
1361static PyObject *
1362_elementtree_Element_getchildren_impl(ElementObject *self)
1363/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001365 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 PyObject* list;
1367
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001368 /* FIXME: report as deprecated? */
1369
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 if (!self->extra)
1371 return PyList_New(0);
1372
1373 list = PyList_New(self->extra->length);
1374 if (!list)
1375 return NULL;
1376
1377 for (i = 0; i < self->extra->length; i++) {
1378 PyObject* item = self->extra->children[i];
1379 Py_INCREF(item);
1380 PyList_SET_ITEM(list, i, item);
1381 }
1382
1383 return list;
1384}
1385
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387static PyObject *
1388create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.iter
1393
1394 tag: object = None
1395
1396[clinic start generated code]*/
1397
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001399_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1400/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001401{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001402 if (PyUnicode_Check(tag)) {
1403 if (PyUnicode_READY(tag) < 0)
1404 return NULL;
1405 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1406 tag = Py_None;
1407 }
1408 else if (PyBytes_Check(tag)) {
1409 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1410 tag = Py_None;
1411 }
1412
Eli Bendersky64d11e62012-06-15 07:42:50 +03001413 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001414}
1415
1416
Serhiy Storchakacb985562015-05-04 15:32:48 +03001417/*[clinic input]
1418_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001419
Serhiy Storchakacb985562015-05-04 15:32:48 +03001420[clinic start generated code]*/
1421
1422static PyObject *
1423_elementtree_Element_itertext_impl(ElementObject *self)
1424/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1425{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001426 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427}
1428
Eli Bendersky64d11e62012-06-15 07:42:50 +03001429
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001431element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001433 ElementObject* self = (ElementObject*) self_;
1434
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435 if (!self->extra || index < 0 || index >= self->extra->length) {
1436 PyErr_SetString(
1437 PyExc_IndexError,
1438 "child index out of range"
1439 );
1440 return NULL;
1441 }
1442
1443 Py_INCREF(self->extra->children[index]);
1444 return self->extra->children[index];
1445}
1446
Serhiy Storchakacb985562015-05-04 15:32:48 +03001447/*[clinic input]
1448_elementtree.Element.insert
1449
1450 index: Py_ssize_t
1451 subelement: object(subclass_of='&Element_Type')
1452 /
1453
1454[clinic start generated code]*/
1455
1456static PyObject *
1457_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1458 PyObject *subelement)
1459/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001461 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462
Victor Stinner5f0af232013-07-11 23:01:36 +02001463 if (!self->extra) {
1464 if (create_extra(self, NULL) < 0)
1465 return NULL;
1466 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001467
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468 if (index < 0) {
1469 index += self->extra->length;
1470 if (index < 0)
1471 index = 0;
1472 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (index > self->extra->length)
1474 index = self->extra->length;
1475
1476 if (element_resize(self, 1) < 0)
1477 return NULL;
1478
1479 for (i = self->extra->length; i > index; i--)
1480 self->extra->children[i] = self->extra->children[i-1];
1481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482 Py_INCREF(subelement);
1483 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484
1485 self->extra->length++;
1486
1487 Py_RETURN_NONE;
1488}
1489
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490/*[clinic input]
1491_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492
Serhiy Storchakacb985562015-05-04 15:32:48 +03001493[clinic start generated code]*/
1494
1495static PyObject *
1496_elementtree_Element_items_impl(ElementObject *self)
1497/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1498{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 if (!self->extra || self->extra->attrib == Py_None)
1500 return PyList_New(0);
1501
1502 return PyDict_Items(self->extra->attrib);
1503}
1504
Serhiy Storchakacb985562015-05-04 15:32:48 +03001505/*[clinic input]
1506_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507
Serhiy Storchakacb985562015-05-04 15:32:48 +03001508[clinic start generated code]*/
1509
1510static PyObject *
1511_elementtree_Element_keys_impl(ElementObject *self)
1512/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1513{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514 if (!self->extra || self->extra->attrib == Py_None)
1515 return PyList_New(0);
1516
1517 return PyDict_Keys(self->extra->attrib);
1518}
1519
Martin v. Löwis18e16552006-02-15 17:27:45 +00001520static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521element_length(ElementObject* self)
1522{
1523 if (!self->extra)
1524 return 0;
1525
1526 return self->extra->length;
1527}
1528
Serhiy Storchakacb985562015-05-04 15:32:48 +03001529/*[clinic input]
1530_elementtree.Element.makeelement
1531
1532 tag: object
1533 attrib: object
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1540 PyObject *attrib)
1541/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542{
1543 PyObject* elem;
1544
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 attrib = PyDict_Copy(attrib);
1546 if (!attrib)
1547 return NULL;
1548
Eli Bendersky092af1f2012-03-04 07:14:03 +02001549 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001550
1551 Py_DECREF(attrib);
1552
1553 return elem;
1554}
1555
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556/*[clinic input]
1557_elementtree.Element.remove
1558
1559 subelement: object(subclass_of='&Element_Type')
1560 /
1561
1562[clinic start generated code]*/
1563
1564static PyObject *
1565_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1566/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001568 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001569 int rc;
1570 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 if (!self->extra) {
1573 /* element has no children, so raise exception */
1574 PyErr_SetString(
1575 PyExc_ValueError,
1576 "list.remove(x): x not in list"
1577 );
1578 return NULL;
1579 }
1580
1581 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001582 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001584 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001585 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001587 if (rc < 0)
1588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 }
1590
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001591 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001592 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593 PyErr_SetString(
1594 PyExc_ValueError,
1595 "list.remove(x): x not in list"
1596 );
1597 return NULL;
1598 }
1599
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001600 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601
1602 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 for (; i < self->extra->length; i++)
1604 self->extra->children[i] = self->extra->children[i+1];
1605
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001606 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 Py_RETURN_NONE;
1608}
1609
1610static PyObject*
1611element_repr(ElementObject* self)
1612{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001613 int status;
1614
1615 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001616 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001617
1618 status = Py_ReprEnter((PyObject *)self);
1619 if (status == 0) {
1620 PyObject *res;
1621 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1622 Py_ReprLeave((PyObject *)self);
1623 return res;
1624 }
1625 if (status > 0)
1626 PyErr_Format(PyExc_RuntimeError,
1627 "reentrant call inside %s.__repr__",
1628 Py_TYPE(self)->tp_name);
1629 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001630}
1631
Serhiy Storchakacb985562015-05-04 15:32:48 +03001632/*[clinic input]
1633_elementtree.Element.set
1634
1635 key: object
1636 value: object
1637 /
1638
1639[clinic start generated code]*/
1640
1641static PyObject *
1642_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1643 PyObject *value)
1644/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001645{
1646 PyObject* attrib;
1647
Victor Stinner5f0af232013-07-11 23:01:36 +02001648 if (!self->extra) {
1649 if (create_extra(self, NULL) < 0)
1650 return NULL;
1651 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652
1653 attrib = element_get_attrib(self);
1654 if (!attrib)
1655 return NULL;
1656
1657 if (PyDict_SetItem(attrib, key, value) < 0)
1658 return NULL;
1659
1660 Py_RETURN_NONE;
1661}
1662
1663static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001664element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001665{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001666 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001667 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001668 PyObject* old;
1669
1670 if (!self->extra || index < 0 || index >= self->extra->length) {
1671 PyErr_SetString(
1672 PyExc_IndexError,
1673 "child assignment index out of range");
1674 return -1;
1675 }
1676
1677 old = self->extra->children[index];
1678
1679 if (item) {
1680 Py_INCREF(item);
1681 self->extra->children[index] = item;
1682 } else {
1683 self->extra->length--;
1684 for (i = index; i < self->extra->length; i++)
1685 self->extra->children[i] = self->extra->children[i+1];
1686 }
1687
1688 Py_DECREF(old);
1689
1690 return 0;
1691}
1692
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001693static PyObject*
1694element_subscr(PyObject* self_, PyObject* item)
1695{
1696 ElementObject* self = (ElementObject*) self_;
1697
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001698 if (PyIndex_Check(item)) {
1699 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001700
1701 if (i == -1 && PyErr_Occurred()) {
1702 return NULL;
1703 }
1704 if (i < 0 && self->extra)
1705 i += self->extra->length;
1706 return element_getitem(self_, i);
1707 }
1708 else if (PySlice_Check(item)) {
1709 Py_ssize_t start, stop, step, slicelen, cur, i;
1710 PyObject* list;
1711
1712 if (!self->extra)
1713 return PyList_New(0);
1714
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001715 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001716 self->extra->length,
1717 &start, &stop, &step, &slicelen) < 0) {
1718 return NULL;
1719 }
1720
1721 if (slicelen <= 0)
1722 return PyList_New(0);
1723 else {
1724 list = PyList_New(slicelen);
1725 if (!list)
1726 return NULL;
1727
1728 for (cur = start, i = 0; i < slicelen;
1729 cur += step, i++) {
1730 PyObject* item = self->extra->children[cur];
1731 Py_INCREF(item);
1732 PyList_SET_ITEM(list, i, item);
1733 }
1734
1735 return list;
1736 }
1737 }
1738 else {
1739 PyErr_SetString(PyExc_TypeError,
1740 "element indices must be integers");
1741 return NULL;
1742 }
1743}
1744
1745static int
1746element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1747{
1748 ElementObject* self = (ElementObject*) self_;
1749
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001750 if (PyIndex_Check(item)) {
1751 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752
1753 if (i == -1 && PyErr_Occurred()) {
1754 return -1;
1755 }
1756 if (i < 0 && self->extra)
1757 i += self->extra->length;
1758 return element_setitem(self_, i, value);
1759 }
1760 else if (PySlice_Check(item)) {
1761 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1762
1763 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001764 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001765
Victor Stinner5f0af232013-07-11 23:01:36 +02001766 if (!self->extra) {
1767 if (create_extra(self, NULL) < 0)
1768 return -1;
1769 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001770
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001771 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001772 self->extra->length,
1773 &start, &stop, &step, &slicelen) < 0) {
1774 return -1;
1775 }
1776
Eli Bendersky865756a2012-03-09 13:38:15 +02001777 if (value == NULL) {
1778 /* Delete slice */
1779 size_t cur;
1780 Py_ssize_t i;
1781
1782 if (slicelen <= 0)
1783 return 0;
1784
1785 /* Since we're deleting, the direction of the range doesn't matter,
1786 * so for simplicity make it always ascending.
1787 */
1788 if (step < 0) {
1789 stop = start + 1;
1790 start = stop + step * (slicelen - 1) - 1;
1791 step = -step;
1792 }
1793
1794 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1795
1796 /* recycle is a list that will contain all the children
1797 * scheduled for removal.
1798 */
1799 if (!(recycle = PyList_New(slicelen))) {
1800 PyErr_NoMemory();
1801 return -1;
1802 }
1803
1804 /* This loop walks over all the children that have to be deleted,
1805 * with cur pointing at them. num_moved is the amount of children
1806 * until the next deleted child that have to be "shifted down" to
1807 * occupy the deleted's places.
1808 * Note that in the ith iteration, shifting is done i+i places down
1809 * because i children were already removed.
1810 */
1811 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1812 /* Compute how many children have to be moved, clipping at the
1813 * list end.
1814 */
1815 Py_ssize_t num_moved = step - 1;
1816 if (cur + step >= (size_t)self->extra->length) {
1817 num_moved = self->extra->length - cur - 1;
1818 }
1819
1820 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1821
1822 memmove(
1823 self->extra->children + cur - i,
1824 self->extra->children + cur + 1,
1825 num_moved * sizeof(PyObject *));
1826 }
1827
1828 /* Leftover "tail" after the last removed child */
1829 cur = start + (size_t)slicelen * step;
1830 if (cur < (size_t)self->extra->length) {
1831 memmove(
1832 self->extra->children + cur - slicelen,
1833 self->extra->children + cur,
1834 (self->extra->length - cur) * sizeof(PyObject *));
1835 }
1836
1837 self->extra->length -= slicelen;
1838
1839 /* Discard the recycle list with all the deleted sub-elements */
1840 Py_XDECREF(recycle);
1841 return 0;
1842 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001843
1844 /* A new slice is actually being assigned */
1845 seq = PySequence_Fast(value, "");
1846 if (!seq) {
1847 PyErr_Format(
1848 PyExc_TypeError,
1849 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1850 );
1851 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001853 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001854
1855 if (step != 1 && newlen != slicelen)
1856 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001857 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 "attempt to assign sequence of size %zd "
1860 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001861 newlen, slicelen
1862 );
1863 return -1;
1864 }
1865
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001866 /* Resize before creating the recycle bin, to prevent refleaks. */
1867 if (newlen > slicelen) {
1868 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870 return -1;
1871 }
1872 }
1873
1874 if (slicelen > 0) {
1875 /* to avoid recursive calls to this method (via decref), move
1876 old items to the recycle bin here, and get rid of them when
1877 we're done modifying the element */
1878 recycle = PyList_New(slicelen);
1879 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001880 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881 return -1;
1882 }
1883 for (cur = start, i = 0; i < slicelen;
1884 cur += step, i++)
1885 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1886 }
1887
1888 if (newlen < slicelen) {
1889 /* delete slice */
1890 for (i = stop; i < self->extra->length; i++)
1891 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1892 } else if (newlen > slicelen) {
1893 /* insert slice */
1894 for (i = self->extra->length-1; i >= stop; i--)
1895 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1896 }
1897
1898 /* replace the slice */
1899 for (cur = start, i = 0; i < newlen;
1900 cur += step, i++) {
1901 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1902 Py_INCREF(element);
1903 self->extra->children[cur] = element;
1904 }
1905
1906 self->extra->length += newlen - slicelen;
1907
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001908 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001909
1910 /* discard the recycle bin, and everything in it */
1911 Py_XDECREF(recycle);
1912
1913 return 0;
1914 }
1915 else {
1916 PyErr_SetString(PyExc_TypeError,
1917 "element indices must be integers");
1918 return -1;
1919 }
1920}
1921
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001922static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001923element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001924{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001925 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001926 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927 return res;
1928}
1929
Serhiy Storchakadde08152015-11-25 15:28:13 +02001930static PyObject*
1931element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001933 PyObject *res = element_get_text(self);
1934 Py_XINCREF(res);
1935 return res;
1936}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001937
Serhiy Storchakadde08152015-11-25 15:28:13 +02001938static PyObject*
1939element_tail_getter(ElementObject *self, void *closure)
1940{
1941 PyObject *res = element_get_tail(self);
1942 Py_XINCREF(res);
1943 return res;
1944}
1945
1946static PyObject*
1947element_attrib_getter(ElementObject *self, void *closure)
1948{
1949 PyObject *res;
1950 if (!self->extra) {
1951 if (create_extra(self, NULL) < 0)
1952 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001953 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001954 res = element_get_attrib(self);
1955 Py_XINCREF(res);
1956 return res;
1957}
Victor Stinner4d463432013-07-11 23:05:03 +02001958
Serhiy Storchakadde08152015-11-25 15:28:13 +02001959/* macro for setter validation */
1960#define _VALIDATE_ATTR_VALUE(V) \
1961 if ((V) == NULL) { \
1962 PyErr_SetString( \
1963 PyExc_AttributeError, \
1964 "can't delete element attribute"); \
1965 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001966 }
1967
Serhiy Storchakadde08152015-11-25 15:28:13 +02001968static int
1969element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1970{
1971 _VALIDATE_ATTR_VALUE(value);
1972 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001973 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001974 return 0;
1975}
1976
1977static int
1978element_text_setter(ElementObject *self, PyObject *value, void *closure)
1979{
1980 _VALIDATE_ATTR_VALUE(value);
1981 Py_INCREF(value);
1982 Py_DECREF(JOIN_OBJ(self->text));
1983 self->text = value;
1984 return 0;
1985}
1986
1987static int
1988element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1989{
1990 _VALIDATE_ATTR_VALUE(value);
1991 Py_INCREF(value);
1992 Py_DECREF(JOIN_OBJ(self->tail));
1993 self->tail = value;
1994 return 0;
1995}
1996
1997static int
1998element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1999{
2000 _VALIDATE_ATTR_VALUE(value);
2001 if (!self->extra) {
2002 if (create_extra(self, NULL) < 0)
2003 return -1;
2004 }
2005 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03002006 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07002007 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002008}
2009
2010static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002011 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002012 0, /* sq_concat */
2013 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002014 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002015 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002016 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002017 0,
2018};
2019
Eli Bendersky64d11e62012-06-15 07:42:50 +03002020/******************************* Element iterator ****************************/
2021
2022/* ElementIterObject represents the iteration state over an XML element in
2023 * pre-order traversal. To keep track of which sub-element should be returned
2024 * next, a stack of parents is maintained. This is a standard stack-based
2025 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002026 * The stack is managed using a continuous array.
2027 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002028 * the current one is exhausted, and the next child to examine in that parent.
2029 */
2030typedef struct ParentLocator_t {
2031 ElementObject *parent;
2032 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002033} ParentLocator;
2034
2035typedef struct {
2036 PyObject_HEAD
2037 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002038 Py_ssize_t parent_stack_used;
2039 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002040 ElementObject *root_element;
2041 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002042 int gettext;
2043} ElementIterObject;
2044
2045
2046static void
2047elementiter_dealloc(ElementIterObject *it)
2048{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002049 Py_ssize_t i = it->parent_stack_used;
2050 it->parent_stack_used = 0;
2051 while (i--)
2052 Py_XDECREF(it->parent_stack[i].parent);
2053 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002054
2055 Py_XDECREF(it->sought_tag);
2056 Py_XDECREF(it->root_element);
2057
2058 PyObject_GC_UnTrack(it);
2059 PyObject_GC_Del(it);
2060}
2061
2062static int
2063elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2064{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002065 Py_ssize_t i = it->parent_stack_used;
2066 while (i--)
2067 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002068
2069 Py_VISIT(it->root_element);
2070 Py_VISIT(it->sought_tag);
2071 return 0;
2072}
2073
2074/* Helper function for elementiter_next. Add a new parent to the parent stack.
2075 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002076static int
2077parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002079 ParentLocator *item;
2080
2081 if (it->parent_stack_used >= it->parent_stack_size) {
2082 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2083 ParentLocator *parent_stack = it->parent_stack;
2084 PyMem_Resize(parent_stack, ParentLocator, new_size);
2085 if (parent_stack == NULL)
2086 return -1;
2087 it->parent_stack = parent_stack;
2088 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002089 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002090 item = it->parent_stack + it->parent_stack_used++;
2091 Py_INCREF(parent);
2092 item->parent = parent;
2093 item->child_index = 0;
2094 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095}
2096
2097static PyObject *
2098elementiter_next(ElementIterObject *it)
2099{
2100 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002101 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102 * A short note on gettext: this function serves both the iter() and
2103 * itertext() methods to avoid code duplication. However, there are a few
2104 * small differences in the way these iterations work. Namely:
2105 * - itertext() only yields text from nodes that have it, and continues
2106 * iterating when a node doesn't have text (so it doesn't return any
2107 * node like iter())
2108 * - itertext() also has to handle tail, after finishing with all the
2109 * children of a node.
2110 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002111 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002112 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002113 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114
2115 while (1) {
2116 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002117 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118 * iterator is exhausted.
2119 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002120 if (!it->parent_stack_used) {
2121 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002122 PyErr_SetNone(PyExc_StopIteration);
2123 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002124 }
2125
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002126 elem = it->root_element; /* steals a reference */
2127 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002128 }
2129 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002130 /* See if there are children left to traverse in the current parent. If
2131 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002133 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2134 Py_ssize_t child_index = item->child_index;
2135 ElementObjectExtra *extra;
2136 elem = item->parent;
2137 extra = elem->extra;
2138 if (!extra || child_index >= extra->length) {
2139 it->parent_stack_used--;
2140 /* Note that extra condition on it->parent_stack_used here;
2141 * this is because itertext() is supposed to only return *inner*
2142 * text, not text following the element it began iteration with.
2143 */
2144 if (it->gettext && it->parent_stack_used) {
2145 text = element_get_tail(elem);
2146 goto gettext;
2147 }
2148 Py_DECREF(elem);
2149 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002150 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002151
2152 elem = (ElementObject *)extra->children[child_index];
2153 item->child_index++;
2154 Py_INCREF(elem);
2155 }
2156
2157 if (parent_stack_push_new(it, elem) < 0) {
2158 Py_DECREF(elem);
2159 PyErr_NoMemory();
2160 return NULL;
2161 }
2162 if (it->gettext) {
2163 text = element_get_text(elem);
2164 goto gettext;
2165 }
2166
2167 if (it->sought_tag == Py_None)
2168 return (PyObject *)elem;
2169
2170 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2171 if (rc > 0)
2172 return (PyObject *)elem;
2173
2174 Py_DECREF(elem);
2175 if (rc < 0)
2176 return NULL;
2177 continue;
2178
2179gettext:
2180 if (!text) {
2181 Py_DECREF(elem);
2182 return NULL;
2183 }
2184 if (text == Py_None) {
2185 Py_DECREF(elem);
2186 }
2187 else {
2188 Py_INCREF(text);
2189 Py_DECREF(elem);
2190 rc = PyObject_IsTrue(text);
2191 if (rc > 0)
2192 return text;
2193 Py_DECREF(text);
2194 if (rc < 0)
2195 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002196 }
2197 }
2198
2199 return NULL;
2200}
2201
2202
2203static PyTypeObject ElementIter_Type = {
2204 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002205 /* Using the module's name since the pure-Python implementation does not
2206 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207 "_elementtree._element_iterator", /* tp_name */
2208 sizeof(ElementIterObject), /* tp_basicsize */
2209 0, /* tp_itemsize */
2210 /* methods */
2211 (destructor)elementiter_dealloc, /* tp_dealloc */
2212 0, /* tp_print */
2213 0, /* tp_getattr */
2214 0, /* tp_setattr */
2215 0, /* tp_reserved */
2216 0, /* tp_repr */
2217 0, /* tp_as_number */
2218 0, /* tp_as_sequence */
2219 0, /* tp_as_mapping */
2220 0, /* tp_hash */
2221 0, /* tp_call */
2222 0, /* tp_str */
2223 0, /* tp_getattro */
2224 0, /* tp_setattro */
2225 0, /* tp_as_buffer */
2226 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2227 0, /* tp_doc */
2228 (traverseproc)elementiter_traverse, /* tp_traverse */
2229 0, /* tp_clear */
2230 0, /* tp_richcompare */
2231 0, /* tp_weaklistoffset */
2232 PyObject_SelfIter, /* tp_iter */
2233 (iternextfunc)elementiter_next, /* tp_iternext */
2234 0, /* tp_methods */
2235 0, /* tp_members */
2236 0, /* tp_getset */
2237 0, /* tp_base */
2238 0, /* tp_dict */
2239 0, /* tp_descr_get */
2240 0, /* tp_descr_set */
2241 0, /* tp_dictoffset */
2242 0, /* tp_init */
2243 0, /* tp_alloc */
2244 0, /* tp_new */
2245};
2246
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002247#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002248
2249static PyObject *
2250create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2251{
2252 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002253
2254 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2255 if (!it)
2256 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257
Victor Stinner4d463432013-07-11 23:05:03 +02002258 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002259 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002260 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002261 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002262 it->root_element = self;
2263
Eli Bendersky64d11e62012-06-15 07:42:50 +03002264 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002265
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002266 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002267 if (it->parent_stack == NULL) {
2268 Py_DECREF(it);
2269 PyErr_NoMemory();
2270 return NULL;
2271 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002272 it->parent_stack_used = 0;
2273 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002274
Eli Bendersky64d11e62012-06-15 07:42:50 +03002275 return (PyObject *)it;
2276}
2277
2278
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279/* ==================================================================== */
2280/* the tree builder type */
2281
2282typedef struct {
2283 PyObject_HEAD
2284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286
Antoine Pitrouee329312012-10-04 19:53:29 +02002287 PyObject *this; /* current node */
2288 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002289
Eli Bendersky58d548d2012-05-29 15:45:16 +03002290 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291
Eli Bendersky58d548d2012-05-29 15:45:16 +03002292 PyObject *stack; /* element stack */
2293 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002294
Eli Bendersky48d358b2012-05-30 17:57:50 +03002295 PyObject *element_factory;
2296
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002298 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002299 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2300 PyObject *end_event_obj;
2301 PyObject *start_ns_event_obj;
2302 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303} TreeBuilderObject;
2304
Christian Heimes90aa7642007-12-19 02:45:37 +00002305#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002306
2307/* -------------------------------------------------------------------- */
2308/* constructor and destructor */
2309
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310static PyObject *
2311treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002312{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2314 if (t != NULL) {
2315 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002316
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002318 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002319 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002320 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002323 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002324 t->stack = PyList_New(20);
2325 if (!t->stack) {
2326 Py_DECREF(t->this);
2327 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002328 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 return NULL;
2330 }
2331 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002333 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002334 t->start_event_obj = t->end_event_obj = NULL;
2335 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2336 }
2337 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338}
2339
Serhiy Storchakacb985562015-05-04 15:32:48 +03002340/*[clinic input]
2341_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002342
Serhiy Storchakacb985562015-05-04 15:32:48 +03002343 element_factory: object = NULL
2344
2345[clinic start generated code]*/
2346
2347static int
2348_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2349 PyObject *element_factory)
2350/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2351{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002352 if (element_factory) {
2353 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002354 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002355 }
2356
Eli Bendersky58d548d2012-05-29 15:45:16 +03002357 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358}
2359
Eli Bendersky48d358b2012-05-30 17:57:50 +03002360static int
2361treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2362{
2363 Py_VISIT(self->root);
2364 Py_VISIT(self->this);
2365 Py_VISIT(self->last);
2366 Py_VISIT(self->data);
2367 Py_VISIT(self->stack);
2368 Py_VISIT(self->element_factory);
2369 return 0;
2370}
2371
2372static int
2373treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002375 Py_CLEAR(self->end_ns_event_obj);
2376 Py_CLEAR(self->start_ns_event_obj);
2377 Py_CLEAR(self->end_event_obj);
2378 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002379 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002380 Py_CLEAR(self->stack);
2381 Py_CLEAR(self->data);
2382 Py_CLEAR(self->last);
2383 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002385 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386 return 0;
2387}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388
Eli Bendersky48d358b2012-05-30 17:57:50 +03002389static void
2390treebuilder_dealloc(TreeBuilderObject *self)
2391{
2392 PyObject_GC_UnTrack(self);
2393 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002394 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395}
2396
2397/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002398/* helpers for handling of arbitrary element-like objects */
2399
2400static int
2401treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2402 PyObject **dest, _Py_Identifier *name)
2403{
2404 if (Element_CheckExact(element)) {
2405 Py_DECREF(JOIN_OBJ(*dest));
2406 *dest = JOIN_SET(data, PyList_CheckExact(data));
2407 return 0;
2408 }
2409 else {
2410 PyObject *joined = list_join(data);
2411 int r;
2412 if (joined == NULL)
2413 return -1;
2414 r = _PyObject_SetAttrId(element, name, joined);
2415 Py_DECREF(joined);
2416 return r;
2417 }
2418}
2419
2420/* These two functions steal a reference to data */
2421static int
2422treebuilder_set_element_text(PyObject *element, PyObject *data)
2423{
2424 _Py_IDENTIFIER(text);
2425 return treebuilder_set_element_text_or_tail(
2426 element, data, &((ElementObject *) element)->text, &PyId_text);
2427}
2428
2429static int
2430treebuilder_set_element_tail(PyObject *element, PyObject *data)
2431{
2432 _Py_IDENTIFIER(tail);
2433 return treebuilder_set_element_text_or_tail(
2434 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2435}
2436
2437static int
2438treebuilder_add_subelement(PyObject *element, PyObject *child)
2439{
2440 _Py_IDENTIFIER(append);
2441 if (Element_CheckExact(element)) {
2442 ElementObject *elem = (ElementObject *) element;
2443 return element_add_subelement(elem, child);
2444 }
2445 else {
2446 PyObject *res;
2447 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2448 if (res == NULL)
2449 return -1;
2450 Py_DECREF(res);
2451 return 0;
2452 }
2453}
2454
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002455LOCAL(int)
2456treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2457 PyObject *node)
2458{
2459 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002460 PyObject *res;
2461 PyObject *event = PyTuple_Pack(2, action, node);
2462 if (event == NULL)
2463 return -1;
2464 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2465 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002466 if (res == NULL)
2467 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002468 Py_DECREF(res);
2469 }
2470 return 0;
2471}
2472
Antoine Pitrouee329312012-10-04 19:53:29 +02002473/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474/* handlers */
2475
2476LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2478 PyObject* attrib)
2479{
2480 PyObject* node;
2481 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002482 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002483
2484 if (self->data) {
2485 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002486 if (treebuilder_set_element_text(self->last, self->data))
2487 return NULL;
2488 }
2489 else {
2490 if (treebuilder_set_element_tail(self->last, self->data))
2491 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 }
2493 self->data = NULL;
2494 }
2495
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002496 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002497 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002498 } else if (attrib == Py_None) {
2499 attrib = PyDict_New();
2500 if (!attrib)
2501 return NULL;
2502 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2503 Py_DECREF(attrib);
2504 }
2505 else {
2506 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002507 }
2508 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002510 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513
2514 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002515 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 } else {
2518 if (self->root) {
2519 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002520 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 "multiple elements on top level"
2522 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002523 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524 }
2525 Py_INCREF(node);
2526 self->root = node;
2527 }
2528
2529 if (self->index < PyList_GET_SIZE(self->stack)) {
2530 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002531 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 Py_INCREF(this);
2533 } else {
2534 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002535 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 }
2537 self->index++;
2538
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002540 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002542 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002544 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2545 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546
2547 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002548
2549 error:
2550 Py_DECREF(node);
2551 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552}
2553
2554LOCAL(PyObject*)
2555treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2556{
2557 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002558 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002559 /* ignore calls to data before the first call to start */
2560 Py_RETURN_NONE;
2561 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 /* store the first item as is */
2563 Py_INCREF(data); self->data = data;
2564 } else {
2565 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002566 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2567 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002568 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569 /* expat often generates single character data sections; handle
2570 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002571 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2572 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002574 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 } else if (PyList_CheckExact(self->data)) {
2576 if (PyList_Append(self->data, data) < 0)
2577 return NULL;
2578 } else {
2579 PyObject* list = PyList_New(2);
2580 if (!list)
2581 return NULL;
2582 PyList_SET_ITEM(list, 0, self->data);
2583 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2584 self->data = list;
2585 }
2586 }
2587
2588 Py_RETURN_NONE;
2589}
2590
2591LOCAL(PyObject*)
2592treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2593{
2594 PyObject* item;
2595
2596 if (self->data) {
2597 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002598 if (treebuilder_set_element_text(self->last, self->data))
2599 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002601 if (treebuilder_set_element_tail(self->last, self->data))
2602 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 }
2604 self->data = NULL;
2605 }
2606
2607 if (self->index == 0) {
2608 PyErr_SetString(
2609 PyExc_IndexError,
2610 "pop from empty stack"
2611 );
2612 return NULL;
2613 }
2614
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002615 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002616 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002617 self->index--;
2618 self->this = PyList_GET_ITEM(self->stack, self->index);
2619 Py_INCREF(self->this);
2620 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002622 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2623 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624
2625 Py_INCREF(self->last);
2626 return (PyObject*) self->last;
2627}
2628
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629/* -------------------------------------------------------------------- */
2630/* methods (in alphabetical order) */
2631
Serhiy Storchakacb985562015-05-04 15:32:48 +03002632/*[clinic input]
2633_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002634
Serhiy Storchakacb985562015-05-04 15:32:48 +03002635 data: object
2636 /
2637
2638[clinic start generated code]*/
2639
2640static PyObject *
2641_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2642/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2643{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002644 return treebuilder_handle_data(self, data);
2645}
2646
Serhiy Storchakacb985562015-05-04 15:32:48 +03002647/*[clinic input]
2648_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002649
Serhiy Storchakacb985562015-05-04 15:32:48 +03002650 tag: object
2651 /
2652
2653[clinic start generated code]*/
2654
2655static PyObject *
2656_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2657/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2658{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002659 return treebuilder_handle_end(self, tag);
2660}
2661
2662LOCAL(PyObject*)
2663treebuilder_done(TreeBuilderObject* self)
2664{
2665 PyObject* res;
2666
2667 /* FIXME: check stack size? */
2668
2669 if (self->root)
2670 res = self->root;
2671 else
2672 res = Py_None;
2673
2674 Py_INCREF(res);
2675 return res;
2676}
2677
Serhiy Storchakacb985562015-05-04 15:32:48 +03002678/*[clinic input]
2679_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002680
Serhiy Storchakacb985562015-05-04 15:32:48 +03002681[clinic start generated code]*/
2682
2683static PyObject *
2684_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2685/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2686{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002687 return treebuilder_done(self);
2688}
2689
Serhiy Storchakacb985562015-05-04 15:32:48 +03002690/*[clinic input]
2691_elementtree.TreeBuilder.start
2692
2693 tag: object
2694 attrs: object = None
2695 /
2696
2697[clinic start generated code]*/
2698
2699static PyObject *
2700_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2701 PyObject *attrs)
2702/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002704 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705}
2706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707/* ==================================================================== */
2708/* the expat interface */
2709
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002712
2713/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2714 * cached globally without being in per-module state.
2715 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002716static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Eli Bendersky52467b12012-06-01 07:13:08 +03002719static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2720 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2721
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722typedef struct {
2723 PyObject_HEAD
2724
2725 XML_Parser parser;
2726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *target;
2728 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002730 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002732 PyObject *handle_start;
2733 PyObject *handle_data;
2734 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002736 PyObject *handle_comment;
2737 PyObject *handle_pi;
2738 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002740 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002741
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742} XMLParserObject;
2743
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002744static PyObject*
2745_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2746static PyObject *
2747_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2748 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002750/* helpers */
2751
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752LOCAL(PyObject*)
2753makeuniversal(XMLParserObject* self, const char* string)
2754{
2755 /* convert a UTF-8 tag/attribute name from the expat parser
2756 to a universal name string */
2757
Antoine Pitrouc1948842012-10-01 23:40:37 +02002758 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759 PyObject* key;
2760 PyObject* value;
2761
2762 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002763 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764 if (!key)
2765 return NULL;
2766
2767 value = PyDict_GetItem(self->names, key);
2768
2769 if (value) {
2770 Py_INCREF(value);
2771 } else {
2772 /* new name. convert to universal name, and decode as
2773 necessary */
2774
2775 PyObject* tag;
2776 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002777 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778
2779 /* look for namespace separator */
2780 for (i = 0; i < size; i++)
2781 if (string[i] == '}')
2782 break;
2783 if (i != size) {
2784 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002785 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002786 if (tag == NULL) {
2787 Py_DECREF(key);
2788 return NULL;
2789 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002790 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 p[0] = '{';
2792 memcpy(p+1, string, size);
2793 size++;
2794 } else {
2795 /* plain name; use key as tag */
2796 Py_INCREF(key);
2797 tag = key;
2798 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002799
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002801 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002802 value = PyUnicode_DecodeUTF8(p, size, "strict");
2803 Py_DECREF(tag);
2804 if (!value) {
2805 Py_DECREF(key);
2806 return NULL;
2807 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002808
2809 /* add to names dictionary */
2810 if (PyDict_SetItem(self->names, key, value) < 0) {
2811 Py_DECREF(key);
2812 Py_DECREF(value);
2813 return NULL;
2814 }
2815 }
2816
2817 Py_DECREF(key);
2818 return value;
2819}
2820
Eli Bendersky5b77d812012-03-16 08:20:05 +02002821/* Set the ParseError exception with the given parameters.
2822 * If message is not NULL, it's used as the error string. Otherwise, the
2823 * message string is the default for the given error_code.
2824*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002825static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002826expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2827 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002829 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002830 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002831
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002832 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002833 message ? message : EXPAT(ErrorString)(error_code),
2834 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002835 if (errmsg == NULL)
2836 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002837
Eli Bendersky532d03e2013-08-10 08:00:39 -07002838 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002839 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002840 if (!error)
2841 return;
2842
Eli Bendersky5b77d812012-03-16 08:20:05 +02002843 /* Add code and position attributes */
2844 code = PyLong_FromLong((long)error_code);
2845 if (!code) {
2846 Py_DECREF(error);
2847 return;
2848 }
2849 if (PyObject_SetAttrString(error, "code", code) == -1) {
2850 Py_DECREF(error);
2851 Py_DECREF(code);
2852 return;
2853 }
2854 Py_DECREF(code);
2855
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002856 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002857 if (!position) {
2858 Py_DECREF(error);
2859 return;
2860 }
2861 if (PyObject_SetAttrString(error, "position", position) == -1) {
2862 Py_DECREF(error);
2863 Py_DECREF(position);
2864 return;
2865 }
2866 Py_DECREF(position);
2867
Eli Bendersky532d03e2013-08-10 08:00:39 -07002868 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869 Py_DECREF(error);
2870}
2871
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002872/* -------------------------------------------------------------------- */
2873/* handlers */
2874
2875static void
2876expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2877 int data_len)
2878{
2879 PyObject* key;
2880 PyObject* value;
2881 PyObject* res;
2882
2883 if (data_len < 2 || data_in[0] != '&')
2884 return;
2885
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002886 if (PyErr_Occurred())
2887 return;
2888
Neal Norwitz0269b912007-08-08 06:56:02 +00002889 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002890 if (!key)
2891 return;
2892
2893 value = PyDict_GetItem(self->entity, key);
2894
2895 if (value) {
2896 if (TreeBuilder_CheckExact(self->target))
2897 res = treebuilder_handle_data(
2898 (TreeBuilderObject*) self->target, value
2899 );
2900 else if (self->handle_data)
2901 res = PyObject_CallFunction(self->handle_data, "O", value);
2902 else
2903 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002905 } else if (!PyErr_Occurred()) {
2906 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002907 char message[128] = "undefined entity ";
2908 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002909 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002910 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002912 EXPAT(GetErrorColumnNumber)(self->parser),
2913 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914 );
2915 }
2916
2917 Py_DECREF(key);
2918}
2919
2920static void
2921expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2922 const XML_Char **attrib_in)
2923{
2924 PyObject* res;
2925 PyObject* tag;
2926 PyObject* attrib;
2927 int ok;
2928
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002929 if (PyErr_Occurred())
2930 return;
2931
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 /* tag name */
2933 tag = makeuniversal(self, tag_in);
2934 if (!tag)
2935 return; /* parser will look for errors */
2936
2937 /* attributes */
2938 if (attrib_in[0]) {
2939 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002940 if (!attrib) {
2941 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002942 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002943 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 while (attrib_in[0] && attrib_in[1]) {
2945 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002946 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 if (!key || !value) {
2948 Py_XDECREF(value);
2949 Py_XDECREF(key);
2950 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002951 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952 return;
2953 }
2954 ok = PyDict_SetItem(attrib, key, value);
2955 Py_DECREF(value);
2956 Py_DECREF(key);
2957 if (ok < 0) {
2958 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002959 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 return;
2961 }
2962 attrib_in += 2;
2963 }
2964 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002965 Py_INCREF(Py_None);
2966 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002967 }
2968
2969 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 /* shortcut */
2971 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2972 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002973 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002974 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002975 if (attrib == Py_None) {
2976 Py_DECREF(attrib);
2977 attrib = PyDict_New();
2978 if (!attrib) {
2979 Py_DECREF(tag);
2980 return;
2981 }
2982 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002984 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002985 res = NULL;
2986
2987 Py_DECREF(tag);
2988 Py_DECREF(attrib);
2989
2990 Py_XDECREF(res);
2991}
2992
2993static void
2994expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2995 int data_len)
2996{
2997 PyObject* data;
2998 PyObject* res;
2999
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003000 if (PyErr_Occurred())
3001 return;
3002
Neal Norwitz0269b912007-08-08 06:56:02 +00003003 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003004 if (!data)
3005 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006
3007 if (TreeBuilder_CheckExact(self->target))
3008 /* shortcut */
3009 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3010 else if (self->handle_data)
3011 res = PyObject_CallFunction(self->handle_data, "O", data);
3012 else
3013 res = NULL;
3014
3015 Py_DECREF(data);
3016
3017 Py_XDECREF(res);
3018}
3019
3020static void
3021expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3022{
3023 PyObject* tag;
3024 PyObject* res = NULL;
3025
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003026 if (PyErr_Occurred())
3027 return;
3028
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029 if (TreeBuilder_CheckExact(self->target))
3030 /* shortcut */
3031 /* the standard tree builder doesn't look at the end tag */
3032 res = treebuilder_handle_end(
3033 (TreeBuilderObject*) self->target, Py_None
3034 );
3035 else if (self->handle_end) {
3036 tag = makeuniversal(self, tag_in);
3037 if (tag) {
3038 res = PyObject_CallFunction(self->handle_end, "O", tag);
3039 Py_DECREF(tag);
3040 }
3041 }
3042
3043 Py_XDECREF(res);
3044}
3045
3046static void
3047expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3048 const XML_Char *uri)
3049{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003050 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3051 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003052
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003053 if (PyErr_Occurred())
3054 return;
3055
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003056 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003057 return;
3058
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003059 if (!uri)
3060 uri = "";
3061 if (!prefix)
3062 prefix = "";
3063
3064 parcel = Py_BuildValue("ss", prefix, uri);
3065 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003066 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003067 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3068 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003069}
3070
3071static void
3072expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3073{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003074 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3075
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003076 if (PyErr_Occurred())
3077 return;
3078
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003079 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003080 return;
3081
3082 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083}
3084
3085static void
3086expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3087{
3088 PyObject* comment;
3089 PyObject* res;
3090
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003091 if (PyErr_Occurred())
3092 return;
3093
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003095 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003096 if (comment) {
3097 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3098 Py_XDECREF(res);
3099 Py_DECREF(comment);
3100 }
3101 }
3102}
3103
Eli Bendersky45839902013-01-13 05:14:47 -08003104static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003105expat_start_doctype_handler(XMLParserObject *self,
3106 const XML_Char *doctype_name,
3107 const XML_Char *sysid,
3108 const XML_Char *pubid,
3109 int has_internal_subset)
3110{
3111 PyObject *self_pyobj = (PyObject *)self;
3112 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3113 PyObject *parser_doctype = NULL;
3114 PyObject *res = NULL;
3115
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003116 if (PyErr_Occurred())
3117 return;
3118
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003119 doctype_name_obj = makeuniversal(self, doctype_name);
3120 if (!doctype_name_obj)
3121 return;
3122
3123 if (sysid) {
3124 sysid_obj = makeuniversal(self, sysid);
3125 if (!sysid_obj) {
3126 Py_DECREF(doctype_name_obj);
3127 return;
3128 }
3129 } else {
3130 Py_INCREF(Py_None);
3131 sysid_obj = Py_None;
3132 }
3133
3134 if (pubid) {
3135 pubid_obj = makeuniversal(self, pubid);
3136 if (!pubid_obj) {
3137 Py_DECREF(doctype_name_obj);
3138 Py_DECREF(sysid_obj);
3139 return;
3140 }
3141 } else {
3142 Py_INCREF(Py_None);
3143 pubid_obj = Py_None;
3144 }
3145
3146 /* If the target has a handler for doctype, call it. */
3147 if (self->handle_doctype) {
3148 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3149 doctype_name_obj, pubid_obj, sysid_obj);
3150 Py_CLEAR(res);
3151 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003152 else {
3153 /* Now see if the parser itself has a doctype method. If yes and it's
3154 * a custom method, call it but warn about deprecation. If it's only
3155 * the vanilla XMLParser method, do nothing.
3156 */
3157 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3158 if (parser_doctype &&
3159 !(PyCFunction_Check(parser_doctype) &&
3160 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3161 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003162 (PyCFunction) _elementtree_XMLParser_doctype)) {
3163 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3164 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003165 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003166 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003167 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003168 res = PyObject_CallFunction(parser_doctype, "OOO",
3169 doctype_name_obj, pubid_obj, sysid_obj);
3170 Py_CLEAR(res);
3171 }
3172 }
3173
3174clear:
3175 Py_XDECREF(parser_doctype);
3176 Py_DECREF(doctype_name_obj);
3177 Py_DECREF(pubid_obj);
3178 Py_DECREF(sysid_obj);
3179}
3180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003181static void
3182expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3183 const XML_Char* data_in)
3184{
3185 PyObject* target;
3186 PyObject* data;
3187 PyObject* res;
3188
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003189 if (PyErr_Occurred())
3190 return;
3191
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003193 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3194 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003195 if (target && data) {
3196 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3197 Py_XDECREF(res);
3198 Py_DECREF(data);
3199 Py_DECREF(target);
3200 } else {
3201 Py_XDECREF(data);
3202 Py_XDECREF(target);
3203 }
3204 }
3205}
3206
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003207/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003208
Eli Bendersky52467b12012-06-01 07:13:08 +03003209static PyObject *
3210xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211{
Eli Bendersky52467b12012-06-01 07:13:08 +03003212 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3213 if (self) {
3214 self->parser = NULL;
3215 self->target = self->entity = self->names = NULL;
3216 self->handle_start = self->handle_data = self->handle_end = NULL;
3217 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003218 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003219 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003220 return (PyObject *)self;
3221}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222
Serhiy Storchakacb985562015-05-04 15:32:48 +03003223/*[clinic input]
3224_elementtree.XMLParser.__init__
3225
3226 html: object = NULL
3227 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003228 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003229
3230[clinic start generated code]*/
3231
Eli Bendersky52467b12012-06-01 07:13:08 +03003232static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003233_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3234 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003235/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003236{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003237 self->entity = PyDict_New();
3238 if (!self->entity)
3239 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240
Serhiy Storchakacb985562015-05-04 15:32:48 +03003241 self->names = PyDict_New();
3242 if (!self->names) {
3243 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003244 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003246
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3248 if (!self->parser) {
3249 Py_CLEAR(self->entity);
3250 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 }
3254
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 if (target) {
3256 Py_INCREF(target);
3257 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003258 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003260 Py_CLEAR(self->entity);
3261 Py_CLEAR(self->names);
3262 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267
Serhiy Storchakacb985562015-05-04 15:32:48 +03003268 self->handle_start = PyObject_GetAttrString(target, "start");
3269 self->handle_data = PyObject_GetAttrString(target, "data");
3270 self->handle_end = PyObject_GetAttrString(target, "end");
3271 self->handle_comment = PyObject_GetAttrString(target, "comment");
3272 self->handle_pi = PyObject_GetAttrString(target, "pi");
3273 self->handle_close = PyObject_GetAttrString(target, "close");
3274 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275
3276 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003277
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003281 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 (XML_StartElementHandler) expat_start_handler,
3283 (XML_EndElementHandler) expat_end_handler
3284 );
3285 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 (XML_DefaultHandler) expat_default_handler
3288 );
3289 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003290 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 (XML_CharacterDataHandler) expat_data_handler
3292 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003295 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 (XML_CommentHandler) expat_comment_handler
3297 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003298 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003300 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 (XML_ProcessingInstructionHandler) expat_pi_handler
3302 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003303 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003305 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3306 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003308 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003309 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003310 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311
Eli Bendersky52467b12012-06-01 07:13:08 +03003312 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313}
3314
Eli Bendersky52467b12012-06-01 07:13:08 +03003315static int
3316xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3317{
3318 Py_VISIT(self->handle_close);
3319 Py_VISIT(self->handle_pi);
3320 Py_VISIT(self->handle_comment);
3321 Py_VISIT(self->handle_end);
3322 Py_VISIT(self->handle_data);
3323 Py_VISIT(self->handle_start);
3324
3325 Py_VISIT(self->target);
3326 Py_VISIT(self->entity);
3327 Py_VISIT(self->names);
3328
3329 return 0;
3330}
3331
3332static int
3333xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334{
3335 EXPAT(ParserFree)(self->parser);
3336
Antoine Pitrouc1948842012-10-01 23:40:37 +02003337 Py_CLEAR(self->handle_close);
3338 Py_CLEAR(self->handle_pi);
3339 Py_CLEAR(self->handle_comment);
3340 Py_CLEAR(self->handle_end);
3341 Py_CLEAR(self->handle_data);
3342 Py_CLEAR(self->handle_start);
3343 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344
Antoine Pitrouc1948842012-10-01 23:40:37 +02003345 Py_CLEAR(self->target);
3346 Py_CLEAR(self->entity);
3347 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
Eli Bendersky52467b12012-06-01 07:13:08 +03003349 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350}
3351
Eli Bendersky52467b12012-06-01 07:13:08 +03003352static void
3353xmlparser_dealloc(XMLParserObject* self)
3354{
3355 PyObject_GC_UnTrack(self);
3356 xmlparser_gc_clear(self);
3357 Py_TYPE(self)->tp_free((PyObject *)self);
3358}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359
3360LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003361expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362{
3363 int ok;
3364
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003365 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3367
3368 if (PyErr_Occurred())
3369 return NULL;
3370
3371 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003372 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003373 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003374 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003375 EXPAT(GetErrorColumnNumber)(self->parser),
3376 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377 );
3378 return NULL;
3379 }
3380
3381 Py_RETURN_NONE;
3382}
3383
Serhiy Storchakacb985562015-05-04 15:32:48 +03003384/*[clinic input]
3385_elementtree.XMLParser.close
3386
3387[clinic start generated code]*/
3388
3389static PyObject *
3390_elementtree_XMLParser_close_impl(XMLParserObject *self)
3391/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392{
3393 /* end feeding data to parser */
3394
3395 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003397 if (!res)
3398 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003400 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003401 Py_DECREF(res);
3402 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003403 }
3404 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003405 Py_DECREF(res);
3406 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003407 }
3408 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003409 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003410 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411}
3412
Serhiy Storchakacb985562015-05-04 15:32:48 +03003413/*[clinic input]
3414_elementtree.XMLParser.feed
3415
3416 data: object
3417 /
3418
3419[clinic start generated code]*/
3420
3421static PyObject *
3422_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3423/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003424{
3425 /* feed data to parser */
3426
Serhiy Storchakacb985562015-05-04 15:32:48 +03003427 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003428 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003429 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3430 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003431 return NULL;
3432 if (data_len > INT_MAX) {
3433 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3434 return NULL;
3435 }
3436 /* Explicitly set UTF-8 encoding. Return code ignored. */
3437 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003438 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003439 }
3440 else {
3441 Py_buffer view;
3442 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003443 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003444 return NULL;
3445 if (view.len > INT_MAX) {
3446 PyBuffer_Release(&view);
3447 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3448 return NULL;
3449 }
3450 res = expat_parse(self, view.buf, (int)view.len, 0);
3451 PyBuffer_Release(&view);
3452 return res;
3453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454}
3455
Serhiy Storchakacb985562015-05-04 15:32:48 +03003456/*[clinic input]
3457_elementtree.XMLParser._parse_whole
3458
3459 file: object
3460 /
3461
3462[clinic start generated code]*/
3463
3464static PyObject *
3465_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3466/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467{
Eli Benderskya3699232013-05-19 18:47:23 -07003468 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469 PyObject* reader;
3470 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003471 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003472 PyObject* res;
3473
Serhiy Storchakacb985562015-05-04 15:32:48 +03003474 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475 if (!reader)
3476 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003477
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478 /* read from open file object */
3479 for (;;) {
3480
3481 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3482
3483 if (!buffer) {
3484 /* read failed (e.g. due to KeyboardInterrupt) */
3485 Py_DECREF(reader);
3486 return NULL;
3487 }
3488
Eli Benderskyf996e772012-03-16 05:53:30 +02003489 if (PyUnicode_CheckExact(buffer)) {
3490 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003491 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003492 Py_DECREF(buffer);
3493 break;
3494 }
3495 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003496 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003497 if (!temp) {
3498 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003499 Py_DECREF(reader);
3500 return NULL;
3501 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003502 buffer = temp;
3503 }
3504 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003505 Py_DECREF(buffer);
3506 break;
3507 }
3508
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003509 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3510 Py_DECREF(buffer);
3511 Py_DECREF(reader);
3512 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3513 return NULL;
3514 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003516 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003517 );
3518
3519 Py_DECREF(buffer);
3520
3521 if (!res) {
3522 Py_DECREF(reader);
3523 return NULL;
3524 }
3525 Py_DECREF(res);
3526
3527 }
3528
3529 Py_DECREF(reader);
3530
3531 res = expat_parse(self, "", 0, 1);
3532
3533 if (res && TreeBuilder_CheckExact(self->target)) {
3534 Py_DECREF(res);
3535 return treebuilder_done((TreeBuilderObject*) self->target);
3536 }
3537
3538 return res;
3539}
3540
Serhiy Storchakacb985562015-05-04 15:32:48 +03003541/*[clinic input]
3542_elementtree.XMLParser.doctype
3543
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003544 name: object
3545 pubid: object
3546 system: object
3547 /
3548
Serhiy Storchakacb985562015-05-04 15:32:48 +03003549[clinic start generated code]*/
3550
3551static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003552_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3553 PyObject *pubid, PyObject *system)
3554/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003555{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003556 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3557 "This method of XMLParser is deprecated. Define"
3558 " doctype() method on the TreeBuilder target.",
3559 1) < 0) {
3560 return NULL;
3561 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003562 Py_RETURN_NONE;
3563}
3564
Serhiy Storchakacb985562015-05-04 15:32:48 +03003565/*[clinic input]
3566_elementtree.XMLParser._setevents
3567
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003568 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003569 events_to_report: object = None
3570 /
3571
3572[clinic start generated code]*/
3573
3574static PyObject *
3575_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3576 PyObject *events_queue,
3577 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003578/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003579{
3580 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003581 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003582 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003583 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003584
3585 if (!TreeBuilder_CheckExact(self->target)) {
3586 PyErr_SetString(
3587 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003588 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589 "targets"
3590 );
3591 return NULL;
3592 }
3593
3594 target = (TreeBuilderObject*) self->target;
3595
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003596 events_append = PyObject_GetAttrString(events_queue, "append");
3597 if (events_append == NULL)
3598 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003599 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600
3601 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003602 Py_CLEAR(target->start_event_obj);
3603 Py_CLEAR(target->end_event_obj);
3604 Py_CLEAR(target->start_ns_event_obj);
3605 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003607 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003609 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610 Py_RETURN_NONE;
3611 }
3612
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003613 if (!(events_seq = PySequence_Fast(events_to_report,
3614 "events must be a sequence"))) {
3615 return NULL;
3616 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003617
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003618 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3620 char *event_name = NULL;
3621 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003622 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003623 } else if (PyBytes_Check(event_name_obj)) {
3624 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003625 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003626 if (event_name == NULL) {
3627 Py_DECREF(events_seq);
3628 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3629 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003630 }
3631
3632 Py_INCREF(event_name_obj);
3633 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003634 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003635 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003636 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003637 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003638 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003639 EXPAT(SetNamespaceDeclHandler)(
3640 self->parser,
3641 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3642 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3643 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003644 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003645 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 EXPAT(SetNamespaceDeclHandler)(
3647 self->parser,
3648 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3649 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3650 );
3651 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003652 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003653 Py_DECREF(events_seq);
3654 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655 return NULL;
3656 }
3657 }
3658
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003659 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003661}
3662
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003663static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003664xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003666 if (PyUnicode_Check(nameobj)) {
3667 PyObject* res;
3668 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3669 res = self->entity;
3670 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3671 res = self->target;
3672 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3673 return PyUnicode_FromFormat(
3674 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003676 }
3677 else
3678 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679
Alexander Belopolskye239d232010-12-08 23:31:48 +00003680 Py_INCREF(res);
3681 return res;
3682 }
3683 generic:
3684 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685}
3686
Serhiy Storchakacb985562015-05-04 15:32:48 +03003687#include "clinic/_elementtree.c.h"
3688
3689static PyMethodDef element_methods[] = {
3690
3691 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3692
3693 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3694 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3695
3696 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3697 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3698 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3699
3700 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3701 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3702 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3703 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3704
3705 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3706 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3707 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3708
3709 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3710 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3711
3712 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3713 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3714
3715 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3716
3717 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3718 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3719 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3720 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3721 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3722
3723 {NULL, NULL}
3724};
3725
3726static PyMappingMethods element_as_mapping = {
3727 (lenfunc) element_length,
3728 (binaryfunc) element_subscr,
3729 (objobjargproc) element_ass_subscr,
3730};
3731
Serhiy Storchakadde08152015-11-25 15:28:13 +02003732static PyGetSetDef element_getsetlist[] = {
3733 {"tag",
3734 (getter)element_tag_getter,
3735 (setter)element_tag_setter,
3736 "A string identifying what kind of data this element represents"},
3737 {"text",
3738 (getter)element_text_getter,
3739 (setter)element_text_setter,
3740 "A string of text directly after the start tag, or None"},
3741 {"tail",
3742 (getter)element_tail_getter,
3743 (setter)element_tail_setter,
3744 "A string of text directly after the end tag, or None"},
3745 {"attrib",
3746 (getter)element_attrib_getter,
3747 (setter)element_attrib_setter,
3748 "A dictionary containing the element's attributes"},
3749 {NULL},
3750};
3751
Serhiy Storchakacb985562015-05-04 15:32:48 +03003752static PyTypeObject Element_Type = {
3753 PyVarObject_HEAD_INIT(NULL, 0)
3754 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3755 /* methods */
3756 (destructor)element_dealloc, /* tp_dealloc */
3757 0, /* tp_print */
3758 0, /* tp_getattr */
3759 0, /* tp_setattr */
3760 0, /* tp_reserved */
3761 (reprfunc)element_repr, /* tp_repr */
3762 0, /* tp_as_number */
3763 &element_as_sequence, /* tp_as_sequence */
3764 &element_as_mapping, /* tp_as_mapping */
3765 0, /* tp_hash */
3766 0, /* tp_call */
3767 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003768 PyObject_GenericGetAttr, /* tp_getattro */
3769 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003770 0, /* tp_as_buffer */
3771 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3772 /* tp_flags */
3773 0, /* tp_doc */
3774 (traverseproc)element_gc_traverse, /* tp_traverse */
3775 (inquiry)element_gc_clear, /* tp_clear */
3776 0, /* tp_richcompare */
3777 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3778 0, /* tp_iter */
3779 0, /* tp_iternext */
3780 element_methods, /* tp_methods */
3781 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003782 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003783 0, /* tp_base */
3784 0, /* tp_dict */
3785 0, /* tp_descr_get */
3786 0, /* tp_descr_set */
3787 0, /* tp_dictoffset */
3788 (initproc)element_init, /* tp_init */
3789 PyType_GenericAlloc, /* tp_alloc */
3790 element_new, /* tp_new */
3791 0, /* tp_free */
3792};
3793
3794static PyMethodDef treebuilder_methods[] = {
3795 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3796 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3797 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3798 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3799 {NULL, NULL}
3800};
3801
3802static PyTypeObject TreeBuilder_Type = {
3803 PyVarObject_HEAD_INIT(NULL, 0)
3804 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3805 /* methods */
3806 (destructor)treebuilder_dealloc, /* tp_dealloc */
3807 0, /* tp_print */
3808 0, /* tp_getattr */
3809 0, /* tp_setattr */
3810 0, /* tp_reserved */
3811 0, /* tp_repr */
3812 0, /* tp_as_number */
3813 0, /* tp_as_sequence */
3814 0, /* tp_as_mapping */
3815 0, /* tp_hash */
3816 0, /* tp_call */
3817 0, /* tp_str */
3818 0, /* tp_getattro */
3819 0, /* tp_setattro */
3820 0, /* tp_as_buffer */
3821 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3822 /* tp_flags */
3823 0, /* tp_doc */
3824 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3825 (inquiry)treebuilder_gc_clear, /* tp_clear */
3826 0, /* tp_richcompare */
3827 0, /* tp_weaklistoffset */
3828 0, /* tp_iter */
3829 0, /* tp_iternext */
3830 treebuilder_methods, /* tp_methods */
3831 0, /* tp_members */
3832 0, /* tp_getset */
3833 0, /* tp_base */
3834 0, /* tp_dict */
3835 0, /* tp_descr_get */
3836 0, /* tp_descr_set */
3837 0, /* tp_dictoffset */
3838 _elementtree_TreeBuilder___init__, /* tp_init */
3839 PyType_GenericAlloc, /* tp_alloc */
3840 treebuilder_new, /* tp_new */
3841 0, /* tp_free */
3842};
3843
3844static PyMethodDef xmlparser_methods[] = {
3845 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3846 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3847 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3848 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3849 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3850 {NULL, NULL}
3851};
3852
Neal Norwitz227b5332006-03-22 09:28:35 +00003853static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003854 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003855 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003856 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003857 (destructor)xmlparser_dealloc, /* tp_dealloc */
3858 0, /* tp_print */
3859 0, /* tp_getattr */
3860 0, /* tp_setattr */
3861 0, /* tp_reserved */
3862 0, /* tp_repr */
3863 0, /* tp_as_number */
3864 0, /* tp_as_sequence */
3865 0, /* tp_as_mapping */
3866 0, /* tp_hash */
3867 0, /* tp_call */
3868 0, /* tp_str */
3869 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3870 0, /* tp_setattro */
3871 0, /* tp_as_buffer */
3872 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3873 /* tp_flags */
3874 0, /* tp_doc */
3875 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3876 (inquiry)xmlparser_gc_clear, /* tp_clear */
3877 0, /* tp_richcompare */
3878 0, /* tp_weaklistoffset */
3879 0, /* tp_iter */
3880 0, /* tp_iternext */
3881 xmlparser_methods, /* tp_methods */
3882 0, /* tp_members */
3883 0, /* tp_getset */
3884 0, /* tp_base */
3885 0, /* tp_dict */
3886 0, /* tp_descr_get */
3887 0, /* tp_descr_set */
3888 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003889 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003890 PyType_GenericAlloc, /* tp_alloc */
3891 xmlparser_new, /* tp_new */
3892 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003893};
3894
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003895/* ==================================================================== */
3896/* python module interface */
3897
3898static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003899 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003900 {NULL, NULL}
3901};
3902
Martin v. Löwis1a214512008-06-11 05:26:20 +00003903
Eli Bendersky532d03e2013-08-10 08:00:39 -07003904static struct PyModuleDef elementtreemodule = {
3905 PyModuleDef_HEAD_INIT,
3906 "_elementtree",
3907 NULL,
3908 sizeof(elementtreestate),
3909 _functions,
3910 NULL,
3911 elementtree_traverse,
3912 elementtree_clear,
3913 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003914};
3915
Neal Norwitzf6657e62006-12-28 04:47:50 +00003916PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003917PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003918{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003919 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003920 elementtreestate *st;
3921
3922 m = PyState_FindModule(&elementtreemodule);
3923 if (m) {
3924 Py_INCREF(m);
3925 return m;
3926 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003927
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003928 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003929 if (PyType_Ready(&ElementIter_Type) < 0)
3930 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003931 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003932 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003933 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003934 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003935 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003936 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003937
Eli Bendersky532d03e2013-08-10 08:00:39 -07003938 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003939 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003940 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003941 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003942
Eli Bendersky828efde2012-04-05 05:40:58 +03003943 if (!(temp = PyImport_ImportModule("copy")))
3944 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003945 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003946 Py_XDECREF(temp);
3947
Eli Bendersky532d03e2013-08-10 08:00:39 -07003948 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003949 return NULL;
3950
Eli Bendersky20d41742012-06-01 09:48:37 +03003951 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003952 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3953 if (expat_capi) {
3954 /* check that it's usable */
3955 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003956 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003957 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3958 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003959 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003960 PyErr_SetString(PyExc_ImportError,
3961 "pyexpat version is incompatible");
3962 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003963 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003964 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003965 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003966 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003967
Eli Bendersky532d03e2013-08-10 08:00:39 -07003968 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003969 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003970 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003971 Py_INCREF(st->parseerror_obj);
3972 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003973
Eli Bendersky092af1f2012-03-04 07:14:03 +02003974 Py_INCREF((PyObject *)&Element_Type);
3975 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3976
Eli Bendersky58d548d2012-05-29 15:45:16 +03003977 Py_INCREF((PyObject *)&TreeBuilder_Type);
3978 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3979
Eli Bendersky52467b12012-06-01 07:13:08 +03003980 Py_INCREF((PyObject *)&XMLParser_Type);
3981 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003982
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003983 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003984}