blob: 63639c72e7e1b812b297113dc46629cda4a1d39f [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200399 Py_INCREF(tag);
Serhiy Storchakaec397562016-04-06 09:50:03 +0300400 Py_XSETREF(self_elem->tag, tag);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200401
402 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_DECREF(JOIN_OBJ(tmp));
406
407 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200409 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_DECREF(JOIN_OBJ(tmp));
411
412 return 0;
413}
414
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000415LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200416element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419 PyObject* *children;
420
421 /* make sure self->children can hold the given number of extra
422 elements. set an exception and return -1 if allocation failed */
423
Victor Stinner5f0af232013-07-11 23:01:36 +0200424 if (!self->extra) {
425 if (create_extra(self, NULL) < 0)
426 return -1;
427 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000428
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200429 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
431 if (size > self->extra->allocated) {
432 /* use Python 2.4's list growth strategy */
433 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000434 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100435 * which needs at least 4 bytes.
436 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000437 * be safe.
438 */
439 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200440 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
441 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000442 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000443 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100444 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 * false alarm always assume at least one child to be safe.
446 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000447 children = PyObject_Realloc(self->extra->children,
448 size * sizeof(PyObject*));
449 if (!children)
450 goto nomemory;
451 } else {
452 children = PyObject_Malloc(size * sizeof(PyObject*));
453 if (!children)
454 goto nomemory;
455 /* copy existing children from static area to malloc buffer */
456 memcpy(children, self->extra->children,
457 self->extra->length * sizeof(PyObject*));
458 }
459 self->extra->children = children;
460 self->extra->allocated = size;
461 }
462
463 return 0;
464
465 nomemory:
466 PyErr_NoMemory();
467 return -1;
468}
469
470LOCAL(int)
471element_add_subelement(ElementObject* self, PyObject* element)
472{
473 /* add a child element to a parent */
474
475 if (element_resize(self, 1) < 0)
476 return -1;
477
478 Py_INCREF(element);
479 self->extra->children[self->extra->length] = element;
480
481 self->extra->length++;
482
483 return 0;
484}
485
486LOCAL(PyObject*)
487element_get_attrib(ElementObject* self)
488{
489 /* return borrowed reference to attrib dictionary */
490 /* note: this function assumes that the extra section exists */
491
492 PyObject* res = self->extra->attrib;
493
494 if (res == Py_None) {
495 /* create missing dictionary */
496 res = PyDict_New();
497 if (!res)
498 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200499 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000500 self->extra->attrib = res;
501 }
502
503 return res;
504}
505
506LOCAL(PyObject*)
507element_get_text(ElementObject* self)
508{
509 /* return borrowed reference to text attribute */
510
511 PyObject* res = self->text;
512
513 if (JOIN_GET(res)) {
514 res = JOIN_OBJ(res);
515 if (PyList_CheckExact(res)) {
516 res = list_join(res);
517 if (!res)
518 return NULL;
519 self->text = res;
520 }
521 }
522
523 return res;
524}
525
526LOCAL(PyObject*)
527element_get_tail(ElementObject* self)
528{
529 /* return borrowed reference to text attribute */
530
531 PyObject* res = self->tail;
532
533 if (JOIN_GET(res)) {
534 res = JOIN_OBJ(res);
535 if (PyList_CheckExact(res)) {
536 res = list_join(res);
537 if (!res)
538 return NULL;
539 self->tail = res;
540 }
541 }
542
543 return res;
544}
545
546static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300547subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000548{
549 PyObject* elem;
550
551 ElementObject* parent;
552 PyObject* tag;
553 PyObject* attrib = NULL;
554 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
555 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800556 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000557 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559
Eli Bendersky737b1732012-05-29 06:02:56 +0300560 if (attrib) {
561 /* attrib passed as positional arg */
562 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000563 if (!attrib)
564 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300565 if (kwds) {
566 if (PyDict_Update(attrib, kwds) < 0) {
567 return NULL;
568 }
569 }
570 } else if (kwds) {
571 /* have keyword args */
572 attrib = get_attrib_from_keywords(kwds);
573 if (!attrib)
574 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000575 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300576 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 Py_INCREF(Py_None);
578 attrib = Py_None;
579 }
580
Eli Bendersky092af1f2012-03-04 07:14:03 +0200581 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000582 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200583 if (elem == NULL)
584 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000586 if (element_add_subelement(parent, elem) < 0) {
587 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000588 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000589 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590
591 return elem;
592}
593
Eli Bendersky0192ba32012-03-30 16:38:33 +0300594static int
595element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
596{
597 Py_VISIT(self->tag);
598 Py_VISIT(JOIN_OBJ(self->text));
599 Py_VISIT(JOIN_OBJ(self->tail));
600
601 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200602 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300603 Py_VISIT(self->extra->attrib);
604
605 for (i = 0; i < self->extra->length; ++i)
606 Py_VISIT(self->extra->children[i]);
607 }
608 return 0;
609}
610
611static int
612element_gc_clear(ElementObject *self)
613{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300614 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700615 _clear_joined_ptr(&self->text);
616 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300617
618 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300619 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 return 0;
623}
624
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000625static void
626element_dealloc(ElementObject* self)
627{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300628 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300629
630 if (self->weakreflist != NULL)
631 PyObject_ClearWeakRefs((PyObject *) self);
632
Eli Bendersky0192ba32012-03-30 16:38:33 +0300633 /* element_gc_clear clears all references and deallocates extra
634 */
635 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000636
637 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200638 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000639}
640
641/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000642
Serhiy Storchakacb985562015-05-04 15:32:48 +0300643/*[clinic input]
644_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000645
Serhiy Storchakacb985562015-05-04 15:32:48 +0300646 subelement: object(subclass_of='&Element_Type')
647 /
648
649[clinic start generated code]*/
650
651static PyObject *
652_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
653/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
654{
655 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000656 return NULL;
657
658 Py_RETURN_NONE;
659}
660
Serhiy Storchakacb985562015-05-04 15:32:48 +0300661/*[clinic input]
662_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
Serhiy Storchakacb985562015-05-04 15:32:48 +0300664[clinic start generated code]*/
665
666static PyObject *
667_elementtree_Element_clear_impl(ElementObject *self)
668/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
669{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300670 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
672 Py_INCREF(Py_None);
673 Py_DECREF(JOIN_OBJ(self->text));
674 self->text = Py_None;
675
676 Py_INCREF(Py_None);
677 Py_DECREF(JOIN_OBJ(self->tail));
678 self->tail = Py_None;
679
680 Py_RETURN_NONE;
681}
682
Serhiy Storchakacb985562015-05-04 15:32:48 +0300683/*[clinic input]
684_elementtree.Element.__copy__
685
686[clinic start generated code]*/
687
688static PyObject *
689_elementtree_Element___copy___impl(ElementObject *self)
690/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200692 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693 ElementObject* element;
694
Eli Bendersky092af1f2012-03-04 07:14:03 +0200695 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800696 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697 if (!element)
698 return NULL;
699
700 Py_DECREF(JOIN_OBJ(element->text));
701 element->text = self->text;
702 Py_INCREF(JOIN_OBJ(element->text));
703
704 Py_DECREF(JOIN_OBJ(element->tail));
705 element->tail = self->tail;
706 Py_INCREF(JOIN_OBJ(element->tail));
707
708 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000709 if (element_resize(element, self->extra->length) < 0) {
710 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000711 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000712 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713
714 for (i = 0; i < self->extra->length; i++) {
715 Py_INCREF(self->extra->children[i]);
716 element->extra->children[i] = self->extra->children[i];
717 }
718
719 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000720 }
721
722 return (PyObject*) element;
723}
724
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200725/* Helper for a deep copy. */
726LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
727
Serhiy Storchakacb985562015-05-04 15:32:48 +0300728/*[clinic input]
729_elementtree.Element.__deepcopy__
730
731 memo: object
732 /
733
734[clinic start generated code]*/
735
736static PyObject *
737_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
738/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200740 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741 ElementObject* element;
742 PyObject* tag;
743 PyObject* attrib;
744 PyObject* text;
745 PyObject* tail;
746 PyObject* id;
747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 tag = deepcopy(self->tag, memo);
749 if (!tag)
750 return NULL;
751
752 if (self->extra) {
753 attrib = deepcopy(self->extra->attrib, memo);
754 if (!attrib) {
755 Py_DECREF(tag);
756 return NULL;
757 }
758 } else {
759 Py_INCREF(Py_None);
760 attrib = Py_None;
761 }
762
Eli Bendersky092af1f2012-03-04 07:14:03 +0200763 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764
765 Py_DECREF(tag);
766 Py_DECREF(attrib);
767
768 if (!element)
769 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 text = deepcopy(JOIN_OBJ(self->text), memo);
772 if (!text)
773 goto error;
774 Py_DECREF(element->text);
775 element->text = JOIN_SET(text, JOIN_GET(self->text));
776
777 tail = deepcopy(JOIN_OBJ(self->tail), memo);
778 if (!tail)
779 goto error;
780 Py_DECREF(element->tail);
781 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
782
783 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000784 if (element_resize(element, self->extra->length) < 0)
785 goto error;
786
787 for (i = 0; i < self->extra->length; i++) {
788 PyObject* child = deepcopy(self->extra->children[i], memo);
789 if (!child) {
790 element->extra->length = i;
791 goto error;
792 }
793 element->extra->children[i] = child;
794 }
795
796 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000797 }
798
799 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200800 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000801 if (!id)
802 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000803
804 i = PyDict_SetItem(memo, id, (PyObject*) element);
805
806 Py_DECREF(id);
807
808 if (i < 0)
809 goto error;
810
811 return (PyObject*) element;
812
813 error:
814 Py_DECREF(element);
815 return NULL;
816}
817
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200818LOCAL(PyObject *)
819deepcopy(PyObject *object, PyObject *memo)
820{
821 /* do a deep copy of the given object */
822 PyObject *args;
823 PyObject *result;
824 elementtreestate *st;
825
826 /* Fast paths */
827 if (object == Py_None || PyUnicode_CheckExact(object)) {
828 Py_INCREF(object);
829 return object;
830 }
831
832 if (Py_REFCNT(object) == 1) {
833 if (PyDict_CheckExact(object)) {
834 PyObject *key, *value;
835 Py_ssize_t pos = 0;
836 int simple = 1;
837 while (PyDict_Next(object, &pos, &key, &value)) {
838 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
839 simple = 0;
840 break;
841 }
842 }
843 if (simple)
844 return PyDict_Copy(object);
845 /* Fall through to general case */
846 }
847 else if (Element_CheckExact(object)) {
848 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
849 }
850 }
851
852 /* General case */
853 st = ET_STATE_GLOBAL;
854 if (!st->deepcopy_obj) {
855 PyErr_SetString(PyExc_RuntimeError,
856 "deepcopy helper not found");
857 return NULL;
858 }
859
860 args = PyTuple_Pack(2, object, memo);
861 if (!args)
862 return NULL;
863 result = PyObject_CallObject(st->deepcopy_obj, args);
864 Py_DECREF(args);
865 return result;
866}
867
868
Serhiy Storchakacb985562015-05-04 15:32:48 +0300869/*[clinic input]
870_elementtree.Element.__sizeof__ -> Py_ssize_t
871
872[clinic start generated code]*/
873
874static Py_ssize_t
875_elementtree_Element___sizeof___impl(ElementObject *self)
876/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200877{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200878 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879 if (self->extra) {
880 result += sizeof(ElementObjectExtra);
881 if (self->extra->children != self->extra->_children)
882 result += sizeof(PyObject*) * self->extra->allocated;
883 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300884 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200885}
886
Eli Bendersky698bdb22013-01-10 06:01:06 -0800887/* dict keys for getstate/setstate. */
888#define PICKLED_TAG "tag"
889#define PICKLED_CHILDREN "_children"
890#define PICKLED_ATTRIB "attrib"
891#define PICKLED_TAIL "tail"
892#define PICKLED_TEXT "text"
893
894/* __getstate__ returns a fabricated instance dict as in the pure-Python
895 * Element implementation, for interoperability/interchangeability. This
896 * makes the pure-Python implementation details an API, but (a) there aren't
897 * any unnecessary structures there; and (b) it buys compatibility with 3.2
898 * pickles. See issue #16076.
899 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300900/*[clinic input]
901_elementtree.Element.__getstate__
902
903[clinic start generated code]*/
904
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300906_elementtree_Element___getstate___impl(ElementObject *self)
907/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800908{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200909 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910 PyObject *instancedict = NULL, *children;
911
912 /* Build a list of children. */
913 children = PyList_New(self->extra ? self->extra->length : 0);
914 if (!children)
915 return NULL;
916 for (i = 0; i < PyList_GET_SIZE(children); i++) {
917 PyObject *child = self->extra->children[i];
918 Py_INCREF(child);
919 PyList_SET_ITEM(children, i, child);
920 }
921
922 /* Construct the state object. */
923 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
924 if (noattrib)
925 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
926 PICKLED_TAG, self->tag,
927 PICKLED_CHILDREN, children,
928 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700929 PICKLED_TEXT, JOIN_OBJ(self->text),
930 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800931 else
932 instancedict = Py_BuildValue("{sOsOsOsOsO}",
933 PICKLED_TAG, self->tag,
934 PICKLED_CHILDREN, children,
935 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700936 PICKLED_TEXT, JOIN_OBJ(self->text),
937 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800938 if (instancedict) {
939 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800941 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 else {
943 for (i = 0; i < PyList_GET_SIZE(children); i++)
944 Py_DECREF(PyList_GET_ITEM(children, i));
945 Py_DECREF(children);
946
947 return NULL;
948 }
949}
950
951static PyObject *
952element_setstate_from_attributes(ElementObject *self,
953 PyObject *tag,
954 PyObject *attrib,
955 PyObject *text,
956 PyObject *tail,
957 PyObject *children)
958{
959 Py_ssize_t i, nchildren;
960
961 if (!tag) {
962 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
963 return NULL;
964 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800965
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200966 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300967 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800968
Eli Benderskydd3661e2013-09-13 06:24:25 -0700969 _clear_joined_ptr(&self->text);
970 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
971 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800972
Eli Benderskydd3661e2013-09-13 06:24:25 -0700973 _clear_joined_ptr(&self->tail);
974 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
975 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800976
977 /* Handle ATTRIB and CHILDREN. */
978 if (!children && !attrib)
979 Py_RETURN_NONE;
980
981 /* Compute 'nchildren'. */
982 if (children) {
983 if (!PyList_Check(children)) {
984 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
985 return NULL;
986 }
987 nchildren = PyList_Size(children);
988 }
989 else {
990 nchildren = 0;
991 }
992
993 /* Allocate 'extra'. */
994 if (element_resize(self, nchildren)) {
995 return NULL;
996 }
997 assert(self->extra && self->extra->allocated >= nchildren);
998
999 /* Copy children */
1000 for (i = 0; i < nchildren; i++) {
1001 self->extra->children[i] = PyList_GET_ITEM(children, i);
1002 Py_INCREF(self->extra->children[i]);
1003 }
1004
1005 self->extra->length = nchildren;
1006 self->extra->allocated = nchildren;
1007
1008 /* Stash attrib. */
1009 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001010 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +03001011 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 }
1013
1014 Py_RETURN_NONE;
1015}
1016
1017/* __setstate__ for Element instance from the Python implementation.
1018 * 'state' should be the instance dict.
1019 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020
Eli Bendersky698bdb22013-01-10 06:01:06 -08001021static PyObject *
1022element_setstate_from_Python(ElementObject *self, PyObject *state)
1023{
1024 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1025 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1026 PyObject *args;
1027 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001028 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029
Eli Bendersky698bdb22013-01-10 06:01:06 -08001030 tag = attrib = text = tail = children = NULL;
1031 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034
1035 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1036 &attrib, &text, &tail, &children))
1037 retval = element_setstate_from_attributes(self, tag, attrib, text,
1038 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001039 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001040 retval = NULL;
1041
1042 Py_DECREF(args);
1043 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001044}
1045
Serhiy Storchakacb985562015-05-04 15:32:48 +03001046/*[clinic input]
1047_elementtree.Element.__setstate__
1048
1049 state: object
1050 /
1051
1052[clinic start generated code]*/
1053
Eli Bendersky698bdb22013-01-10 06:01:06 -08001054static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001055_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1056/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001057{
1058 if (!PyDict_CheckExact(state)) {
1059 PyErr_Format(PyExc_TypeError,
1060 "Don't know how to unpickle \"%.200R\" as an Element",
1061 state);
1062 return NULL;
1063 }
1064 else
1065 return element_setstate_from_Python(self, state);
1066}
1067
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001068LOCAL(int)
1069checkpath(PyObject* tag)
1070{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 Py_ssize_t i;
1072 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001073
1074 /* check if a tag contains an xpath character */
1075
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001076#define PATHCHAR(ch) \
1077 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001078
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001079 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1081 void *data = PyUnicode_DATA(tag);
1082 unsigned int kind = PyUnicode_KIND(tag);
1083 for (i = 0; i < len; i++) {
1084 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1085 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001086 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 return 1;
1091 }
1092 return 0;
1093 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001094 if (PyBytes_Check(tag)) {
1095 char *p = PyBytes_AS_STRING(tag);
1096 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001097 if (p[i] == '{')
1098 check = 0;
1099 else if (p[i] == '}')
1100 check = 1;
1101 else if (check && PATHCHAR(p[i]))
1102 return 1;
1103 }
1104 return 0;
1105 }
1106
1107 return 1; /* unknown type; might be path expression */
1108}
1109
Serhiy Storchakacb985562015-05-04 15:32:48 +03001110/*[clinic input]
1111_elementtree.Element.extend
1112
1113 elements: object
1114 /
1115
1116[clinic start generated code]*/
1117
1118static PyObject *
1119_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1120/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121{
1122 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124
Serhiy Storchakacb985562015-05-04 15:32:48 +03001125 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126 if (!seq) {
1127 PyErr_Format(
1128 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 );
1131 return NULL;
1132 }
1133
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001134 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 Py_INCREF(element);
1137 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001138 PyErr_Format(
1139 PyExc_TypeError,
1140 "expected an Element, not \"%.200s\"",
1141 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001142 Py_DECREF(seq);
1143 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001144 return NULL;
1145 }
1146
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147 if (element_add_subelement(self, element) < 0) {
1148 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001149 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001150 return NULL;
1151 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001152 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153 }
1154
1155 Py_DECREF(seq);
1156
1157 Py_RETURN_NONE;
1158}
1159
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160/*[clinic input]
1161_elementtree.Element.find
1162
1163 path: object
1164 namespaces: object = None
1165
1166[clinic start generated code]*/
1167
1168static PyObject *
1169_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1170 PyObject *namespaces)
1171/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001173 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001174 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001175
Serhiy Storchakacb985562015-05-04 15:32:48 +03001176 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001178 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001179 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001180 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001181 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182
1183 if (!self->extra)
1184 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001185
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186 for (i = 0; i < self->extra->length; i++) {
1187 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001188 int rc;
1189 if (!Element_CheckExact(item))
1190 continue;
1191 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001192 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001193 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001194 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 Py_DECREF(item);
1196 if (rc < 0)
1197 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 }
1199
1200 Py_RETURN_NONE;
1201}
1202
Serhiy Storchakacb985562015-05-04 15:32:48 +03001203/*[clinic input]
1204_elementtree.Element.findtext
1205
1206 path: object
1207 default: object = None
1208 namespaces: object = None
1209
1210[clinic start generated code]*/
1211
1212static PyObject *
1213_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1214 PyObject *default_value,
1215 PyObject *namespaces)
1216/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001217{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001218 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001219 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001220 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001221
Serhiy Storchakacb985562015-05-04 15:32:48 +03001222 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001223 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001224 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001225 );
1226
1227 if (!self->extra) {
1228 Py_INCREF(default_value);
1229 return default_value;
1230 }
1231
1232 for (i = 0; i < self->extra->length; i++) {
1233 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001234 int rc;
1235 if (!Element_CheckExact(item))
1236 continue;
1237 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001238 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001239 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001240 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (text == Py_None) {
1242 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001243 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001244 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001245 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001247 return text;
1248 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001249 Py_DECREF(item);
1250 if (rc < 0)
1251 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 }
1253
1254 Py_INCREF(default_value);
1255 return default_value;
1256}
1257
Serhiy Storchakacb985562015-05-04 15:32:48 +03001258/*[clinic input]
1259_elementtree.Element.findall
1260
1261 path: object
1262 namespaces: object = None
1263
1264[clinic start generated code]*/
1265
1266static PyObject *
1267_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1268 PyObject *namespaces)
1269/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001270{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001271 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001273 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001274 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001275
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001277 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001279 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001280 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001281 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282
1283 out = PyList_New(0);
1284 if (!out)
1285 return NULL;
1286
1287 if (!self->extra)
1288 return out;
1289
1290 for (i = 0; i < self->extra->length; i++) {
1291 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001292 int rc;
1293 if (!Element_CheckExact(item))
1294 continue;
1295 Py_INCREF(item);
1296 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1297 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1298 Py_DECREF(item);
1299 Py_DECREF(out);
1300 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001301 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001302 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
1304
1305 return out;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310
Serhiy Storchakacb985562015-05-04 15:32:48 +03001311 path: object
1312 namespaces: object = None
1313
1314[clinic start generated code]*/
1315
1316static PyObject *
1317_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1318 PyObject *namespaces)
1319/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1320{
1321 PyObject* tag = path;
1322 _Py_IDENTIFIER(iterfind);
1323 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001325 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001326 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327}
1328
Serhiy Storchakacb985562015-05-04 15:32:48 +03001329/*[clinic input]
1330_elementtree.Element.get
1331
1332 key: object
1333 default: object = None
1334
1335[clinic start generated code]*/
1336
1337static PyObject *
1338_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1339 PyObject *default_value)
1340/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001341{
1342 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343
1344 if (!self->extra || self->extra->attrib == Py_None)
1345 value = default_value;
1346 else {
1347 value = PyDict_GetItem(self->extra->attrib, key);
1348 if (!value)
1349 value = default_value;
1350 }
1351
1352 Py_INCREF(value);
1353 return value;
1354}
1355
Serhiy Storchakacb985562015-05-04 15:32:48 +03001356/*[clinic input]
1357_elementtree.Element.getchildren
1358
1359[clinic start generated code]*/
1360
1361static PyObject *
1362_elementtree_Element_getchildren_impl(ElementObject *self)
1363/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001364{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001365 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366 PyObject* list;
1367
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001368 /* FIXME: report as deprecated? */
1369
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 if (!self->extra)
1371 return PyList_New(0);
1372
1373 list = PyList_New(self->extra->length);
1374 if (!list)
1375 return NULL;
1376
1377 for (i = 0; i < self->extra->length; i++) {
1378 PyObject* item = self->extra->children[i];
1379 Py_INCREF(item);
1380 PyList_SET_ITEM(list, i, item);
1381 }
1382
1383 return list;
1384}
1385
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387static PyObject *
1388create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.iter
1393
1394 tag: object = None
1395
1396[clinic start generated code]*/
1397
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001399_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1400/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001401{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001402 if (PyUnicode_Check(tag)) {
1403 if (PyUnicode_READY(tag) < 0)
1404 return NULL;
1405 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1406 tag = Py_None;
1407 }
1408 else if (PyBytes_Check(tag)) {
1409 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1410 tag = Py_None;
1411 }
1412
Eli Bendersky64d11e62012-06-15 07:42:50 +03001413 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001414}
1415
1416
Serhiy Storchakacb985562015-05-04 15:32:48 +03001417/*[clinic input]
1418_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001419
Serhiy Storchakacb985562015-05-04 15:32:48 +03001420[clinic start generated code]*/
1421
1422static PyObject *
1423_elementtree_Element_itertext_impl(ElementObject *self)
1424/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1425{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001426 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001427}
1428
Eli Bendersky64d11e62012-06-15 07:42:50 +03001429
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001431element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001433 ElementObject* self = (ElementObject*) self_;
1434
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001435 if (!self->extra || index < 0 || index >= self->extra->length) {
1436 PyErr_SetString(
1437 PyExc_IndexError,
1438 "child index out of range"
1439 );
1440 return NULL;
1441 }
1442
1443 Py_INCREF(self->extra->children[index]);
1444 return self->extra->children[index];
1445}
1446
Serhiy Storchakacb985562015-05-04 15:32:48 +03001447/*[clinic input]
1448_elementtree.Element.insert
1449
1450 index: Py_ssize_t
1451 subelement: object(subclass_of='&Element_Type')
1452 /
1453
1454[clinic start generated code]*/
1455
1456static PyObject *
1457_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1458 PyObject *subelement)
1459/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001460{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001461 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462
Victor Stinner5f0af232013-07-11 23:01:36 +02001463 if (!self->extra) {
1464 if (create_extra(self, NULL) < 0)
1465 return NULL;
1466 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001467
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001468 if (index < 0) {
1469 index += self->extra->length;
1470 if (index < 0)
1471 index = 0;
1472 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (index > self->extra->length)
1474 index = self->extra->length;
1475
1476 if (element_resize(self, 1) < 0)
1477 return NULL;
1478
1479 for (i = self->extra->length; i > index; i--)
1480 self->extra->children[i] = self->extra->children[i-1];
1481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482 Py_INCREF(subelement);
1483 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484
1485 self->extra->length++;
1486
1487 Py_RETURN_NONE;
1488}
1489
Serhiy Storchakacb985562015-05-04 15:32:48 +03001490/*[clinic input]
1491_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001492
Serhiy Storchakacb985562015-05-04 15:32:48 +03001493[clinic start generated code]*/
1494
1495static PyObject *
1496_elementtree_Element_items_impl(ElementObject *self)
1497/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1498{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001499 if (!self->extra || self->extra->attrib == Py_None)
1500 return PyList_New(0);
1501
1502 return PyDict_Items(self->extra->attrib);
1503}
1504
Serhiy Storchakacb985562015-05-04 15:32:48 +03001505/*[clinic input]
1506_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001507
Serhiy Storchakacb985562015-05-04 15:32:48 +03001508[clinic start generated code]*/
1509
1510static PyObject *
1511_elementtree_Element_keys_impl(ElementObject *self)
1512/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1513{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514 if (!self->extra || self->extra->attrib == Py_None)
1515 return PyList_New(0);
1516
1517 return PyDict_Keys(self->extra->attrib);
1518}
1519
Martin v. Löwis18e16552006-02-15 17:27:45 +00001520static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001521element_length(ElementObject* self)
1522{
1523 if (!self->extra)
1524 return 0;
1525
1526 return self->extra->length;
1527}
1528
Serhiy Storchakacb985562015-05-04 15:32:48 +03001529/*[clinic input]
1530_elementtree.Element.makeelement
1531
1532 tag: object
1533 attrib: object
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1540 PyObject *attrib)
1541/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001542{
1543 PyObject* elem;
1544
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545 attrib = PyDict_Copy(attrib);
1546 if (!attrib)
1547 return NULL;
1548
Eli Bendersky092af1f2012-03-04 07:14:03 +02001549 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001550
1551 Py_DECREF(attrib);
1552
1553 return elem;
1554}
1555
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556/*[clinic input]
1557_elementtree.Element.remove
1558
1559 subelement: object(subclass_of='&Element_Type')
1560 /
1561
1562[clinic start generated code]*/
1563
1564static PyObject *
1565_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1566/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001568 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001569 int rc;
1570 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001572 if (!self->extra) {
1573 /* element has no children, so raise exception */
1574 PyErr_SetString(
1575 PyExc_ValueError,
1576 "list.remove(x): x not in list"
1577 );
1578 return NULL;
1579 }
1580
1581 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001582 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001583 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001584 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001585 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001586 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001587 if (rc < 0)
1588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589 }
1590
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001591 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001592 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593 PyErr_SetString(
1594 PyExc_ValueError,
1595 "list.remove(x): x not in list"
1596 );
1597 return NULL;
1598 }
1599
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001600 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001601
1602 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603 for (; i < self->extra->length; i++)
1604 self->extra->children[i] = self->extra->children[i+1];
1605
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001606 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 Py_RETURN_NONE;
1608}
1609
1610static PyObject*
1611element_repr(ElementObject* self)
1612{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001613 if (self->tag)
1614 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1615 else
1616 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617}
1618
Serhiy Storchakacb985562015-05-04 15:32:48 +03001619/*[clinic input]
1620_elementtree.Element.set
1621
1622 key: object
1623 value: object
1624 /
1625
1626[clinic start generated code]*/
1627
1628static PyObject *
1629_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1630 PyObject *value)
1631/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001632{
1633 PyObject* attrib;
1634
Victor Stinner5f0af232013-07-11 23:01:36 +02001635 if (!self->extra) {
1636 if (create_extra(self, NULL) < 0)
1637 return NULL;
1638 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639
1640 attrib = element_get_attrib(self);
1641 if (!attrib)
1642 return NULL;
1643
1644 if (PyDict_SetItem(attrib, key, value) < 0)
1645 return NULL;
1646
1647 Py_RETURN_NONE;
1648}
1649
1650static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001651element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001652{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001653 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001654 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001655 PyObject* old;
1656
1657 if (!self->extra || index < 0 || index >= self->extra->length) {
1658 PyErr_SetString(
1659 PyExc_IndexError,
1660 "child assignment index out of range");
1661 return -1;
1662 }
1663
1664 old = self->extra->children[index];
1665
1666 if (item) {
1667 Py_INCREF(item);
1668 self->extra->children[index] = item;
1669 } else {
1670 self->extra->length--;
1671 for (i = index; i < self->extra->length; i++)
1672 self->extra->children[i] = self->extra->children[i+1];
1673 }
1674
1675 Py_DECREF(old);
1676
1677 return 0;
1678}
1679
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001680static PyObject*
1681element_subscr(PyObject* self_, PyObject* item)
1682{
1683 ElementObject* self = (ElementObject*) self_;
1684
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001685 if (PyIndex_Check(item)) {
1686 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001687
1688 if (i == -1 && PyErr_Occurred()) {
1689 return NULL;
1690 }
1691 if (i < 0 && self->extra)
1692 i += self->extra->length;
1693 return element_getitem(self_, i);
1694 }
1695 else if (PySlice_Check(item)) {
1696 Py_ssize_t start, stop, step, slicelen, cur, i;
1697 PyObject* list;
1698
1699 if (!self->extra)
1700 return PyList_New(0);
1701
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001702 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001703 self->extra->length,
1704 &start, &stop, &step, &slicelen) < 0) {
1705 return NULL;
1706 }
1707
1708 if (slicelen <= 0)
1709 return PyList_New(0);
1710 else {
1711 list = PyList_New(slicelen);
1712 if (!list)
1713 return NULL;
1714
1715 for (cur = start, i = 0; i < slicelen;
1716 cur += step, i++) {
1717 PyObject* item = self->extra->children[cur];
1718 Py_INCREF(item);
1719 PyList_SET_ITEM(list, i, item);
1720 }
1721
1722 return list;
1723 }
1724 }
1725 else {
1726 PyErr_SetString(PyExc_TypeError,
1727 "element indices must be integers");
1728 return NULL;
1729 }
1730}
1731
1732static int
1733element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1734{
1735 ElementObject* self = (ElementObject*) self_;
1736
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001737 if (PyIndex_Check(item)) {
1738 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001739
1740 if (i == -1 && PyErr_Occurred()) {
1741 return -1;
1742 }
1743 if (i < 0 && self->extra)
1744 i += self->extra->length;
1745 return element_setitem(self_, i, value);
1746 }
1747 else if (PySlice_Check(item)) {
1748 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1749
1750 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001751 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001752
Victor Stinner5f0af232013-07-11 23:01:36 +02001753 if (!self->extra) {
1754 if (create_extra(self, NULL) < 0)
1755 return -1;
1756 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001757
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001758 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001759 self->extra->length,
1760 &start, &stop, &step, &slicelen) < 0) {
1761 return -1;
1762 }
1763
Eli Bendersky865756a2012-03-09 13:38:15 +02001764 if (value == NULL) {
1765 /* Delete slice */
1766 size_t cur;
1767 Py_ssize_t i;
1768
1769 if (slicelen <= 0)
1770 return 0;
1771
1772 /* Since we're deleting, the direction of the range doesn't matter,
1773 * so for simplicity make it always ascending.
1774 */
1775 if (step < 0) {
1776 stop = start + 1;
1777 start = stop + step * (slicelen - 1) - 1;
1778 step = -step;
1779 }
1780
1781 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1782
1783 /* recycle is a list that will contain all the children
1784 * scheduled for removal.
1785 */
1786 if (!(recycle = PyList_New(slicelen))) {
1787 PyErr_NoMemory();
1788 return -1;
1789 }
1790
1791 /* This loop walks over all the children that have to be deleted,
1792 * with cur pointing at them. num_moved is the amount of children
1793 * until the next deleted child that have to be "shifted down" to
1794 * occupy the deleted's places.
1795 * Note that in the ith iteration, shifting is done i+i places down
1796 * because i children were already removed.
1797 */
1798 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1799 /* Compute how many children have to be moved, clipping at the
1800 * list end.
1801 */
1802 Py_ssize_t num_moved = step - 1;
1803 if (cur + step >= (size_t)self->extra->length) {
1804 num_moved = self->extra->length - cur - 1;
1805 }
1806
1807 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1808
1809 memmove(
1810 self->extra->children + cur - i,
1811 self->extra->children + cur + 1,
1812 num_moved * sizeof(PyObject *));
1813 }
1814
1815 /* Leftover "tail" after the last removed child */
1816 cur = start + (size_t)slicelen * step;
1817 if (cur < (size_t)self->extra->length) {
1818 memmove(
1819 self->extra->children + cur - slicelen,
1820 self->extra->children + cur,
1821 (self->extra->length - cur) * sizeof(PyObject *));
1822 }
1823
1824 self->extra->length -= slicelen;
1825
1826 /* Discard the recycle list with all the deleted sub-elements */
1827 Py_XDECREF(recycle);
1828 return 0;
1829 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001830
1831 /* A new slice is actually being assigned */
1832 seq = PySequence_Fast(value, "");
1833 if (!seq) {
1834 PyErr_Format(
1835 PyExc_TypeError,
1836 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1837 );
1838 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001839 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001840 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841
1842 if (step != 1 && newlen != slicelen)
1843 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001844 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001845 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001846 "attempt to assign sequence of size %zd "
1847 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001848 newlen, slicelen
1849 );
1850 return -1;
1851 }
1852
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853 /* Resize before creating the recycle bin, to prevent refleaks. */
1854 if (newlen > slicelen) {
1855 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001856 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001857 return -1;
1858 }
1859 }
1860
1861 if (slicelen > 0) {
1862 /* to avoid recursive calls to this method (via decref), move
1863 old items to the recycle bin here, and get rid of them when
1864 we're done modifying the element */
1865 recycle = PyList_New(slicelen);
1866 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001867 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001868 return -1;
1869 }
1870 for (cur = start, i = 0; i < slicelen;
1871 cur += step, i++)
1872 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1873 }
1874
1875 if (newlen < slicelen) {
1876 /* delete slice */
1877 for (i = stop; i < self->extra->length; i++)
1878 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1879 } else if (newlen > slicelen) {
1880 /* insert slice */
1881 for (i = self->extra->length-1; i >= stop; i--)
1882 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1883 }
1884
1885 /* replace the slice */
1886 for (cur = start, i = 0; i < newlen;
1887 cur += step, i++) {
1888 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1889 Py_INCREF(element);
1890 self->extra->children[cur] = element;
1891 }
1892
1893 self->extra->length += newlen - slicelen;
1894
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001895 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896
1897 /* discard the recycle bin, and everything in it */
1898 Py_XDECREF(recycle);
1899
1900 return 0;
1901 }
1902 else {
1903 PyErr_SetString(PyExc_TypeError,
1904 "element indices must be integers");
1905 return -1;
1906 }
1907}
1908
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001909static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001910element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001912 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001913 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001914 return res;
1915}
1916
Serhiy Storchakadde08152015-11-25 15:28:13 +02001917static PyObject*
1918element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001919{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001920 PyObject *res = element_get_text(self);
1921 Py_XINCREF(res);
1922 return res;
1923}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001924
Serhiy Storchakadde08152015-11-25 15:28:13 +02001925static PyObject*
1926element_tail_getter(ElementObject *self, void *closure)
1927{
1928 PyObject *res = element_get_tail(self);
1929 Py_XINCREF(res);
1930 return res;
1931}
1932
1933static PyObject*
1934element_attrib_getter(ElementObject *self, void *closure)
1935{
1936 PyObject *res;
1937 if (!self->extra) {
1938 if (create_extra(self, NULL) < 0)
1939 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001940 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001941 res = element_get_attrib(self);
1942 Py_XINCREF(res);
1943 return res;
1944}
Victor Stinner4d463432013-07-11 23:05:03 +02001945
Serhiy Storchakadde08152015-11-25 15:28:13 +02001946/* macro for setter validation */
1947#define _VALIDATE_ATTR_VALUE(V) \
1948 if ((V) == NULL) { \
1949 PyErr_SetString( \
1950 PyExc_AttributeError, \
1951 "can't delete element attribute"); \
1952 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001953 }
1954
Serhiy Storchakadde08152015-11-25 15:28:13 +02001955static int
1956element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1957{
1958 _VALIDATE_ATTR_VALUE(value);
1959 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001960 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001961 return 0;
1962}
1963
1964static int
1965element_text_setter(ElementObject *self, PyObject *value, void *closure)
1966{
1967 _VALIDATE_ATTR_VALUE(value);
1968 Py_INCREF(value);
1969 Py_DECREF(JOIN_OBJ(self->text));
1970 self->text = value;
1971 return 0;
1972}
1973
1974static int
1975element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1976{
1977 _VALIDATE_ATTR_VALUE(value);
1978 Py_INCREF(value);
1979 Py_DECREF(JOIN_OBJ(self->tail));
1980 self->tail = value;
1981 return 0;
1982}
1983
1984static int
1985element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1986{
1987 _VALIDATE_ATTR_VALUE(value);
1988 if (!self->extra) {
1989 if (create_extra(self, NULL) < 0)
1990 return -1;
1991 }
1992 Py_INCREF(value);
Serhiy Storchakaf01e4082016-04-10 18:12:01 +03001993 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07001994 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001995}
1996
1997static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001998 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001999 0, /* sq_concat */
2000 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002001 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002002 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002003 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002004 0,
2005};
2006
Eli Bendersky64d11e62012-06-15 07:42:50 +03002007/******************************* Element iterator ****************************/
2008
2009/* ElementIterObject represents the iteration state over an XML element in
2010 * pre-order traversal. To keep track of which sub-element should be returned
2011 * next, a stack of parents is maintained. This is a standard stack-based
2012 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002013 * The stack is managed using a continuous array.
2014 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002015 * the current one is exhausted, and the next child to examine in that parent.
2016 */
2017typedef struct ParentLocator_t {
2018 ElementObject *parent;
2019 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002020} ParentLocator;
2021
2022typedef struct {
2023 PyObject_HEAD
2024 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002025 Py_ssize_t parent_stack_used;
2026 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002027 ElementObject *root_element;
2028 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 int gettext;
2030} ElementIterObject;
2031
2032
2033static void
2034elementiter_dealloc(ElementIterObject *it)
2035{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002036 Py_ssize_t i = it->parent_stack_used;
2037 it->parent_stack_used = 0;
2038 while (i--)
2039 Py_XDECREF(it->parent_stack[i].parent);
2040 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002041
2042 Py_XDECREF(it->sought_tag);
2043 Py_XDECREF(it->root_element);
2044
2045 PyObject_GC_UnTrack(it);
2046 PyObject_GC_Del(it);
2047}
2048
2049static int
2050elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2051{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002052 Py_ssize_t i = it->parent_stack_used;
2053 while (i--)
2054 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002055
2056 Py_VISIT(it->root_element);
2057 Py_VISIT(it->sought_tag);
2058 return 0;
2059}
2060
2061/* Helper function for elementiter_next. Add a new parent to the parent stack.
2062 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002063static int
2064parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002065{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002066 ParentLocator *item;
2067
2068 if (it->parent_stack_used >= it->parent_stack_size) {
2069 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2070 ParentLocator *parent_stack = it->parent_stack;
2071 PyMem_Resize(parent_stack, ParentLocator, new_size);
2072 if (parent_stack == NULL)
2073 return -1;
2074 it->parent_stack = parent_stack;
2075 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002076 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002077 item = it->parent_stack + it->parent_stack_used++;
2078 Py_INCREF(parent);
2079 item->parent = parent;
2080 item->child_index = 0;
2081 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082}
2083
2084static PyObject *
2085elementiter_next(ElementIterObject *it)
2086{
2087 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002088 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002089 * A short note on gettext: this function serves both the iter() and
2090 * itertext() methods to avoid code duplication. However, there are a few
2091 * small differences in the way these iterations work. Namely:
2092 * - itertext() only yields text from nodes that have it, and continues
2093 * iterating when a node doesn't have text (so it doesn't return any
2094 * node like iter())
2095 * - itertext() also has to handle tail, after finishing with all the
2096 * children of a node.
2097 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002098 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002099 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002100 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101
2102 while (1) {
2103 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002104 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105 * iterator is exhausted.
2106 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002107 if (!it->parent_stack_used) {
2108 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002109 PyErr_SetNone(PyExc_StopIteration);
2110 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111 }
2112
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002113 elem = it->root_element; /* steals a reference */
2114 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115 }
2116 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002117 /* See if there are children left to traverse in the current parent. If
2118 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002120 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2121 Py_ssize_t child_index = item->child_index;
2122 ElementObjectExtra *extra;
2123 elem = item->parent;
2124 extra = elem->extra;
2125 if (!extra || child_index >= extra->length) {
2126 it->parent_stack_used--;
2127 /* Note that extra condition on it->parent_stack_used here;
2128 * this is because itertext() is supposed to only return *inner*
2129 * text, not text following the element it began iteration with.
2130 */
2131 if (it->gettext && it->parent_stack_used) {
2132 text = element_get_tail(elem);
2133 goto gettext;
2134 }
2135 Py_DECREF(elem);
2136 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002137 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002138
2139 elem = (ElementObject *)extra->children[child_index];
2140 item->child_index++;
2141 Py_INCREF(elem);
2142 }
2143
2144 if (parent_stack_push_new(it, elem) < 0) {
2145 Py_DECREF(elem);
2146 PyErr_NoMemory();
2147 return NULL;
2148 }
2149 if (it->gettext) {
2150 text = element_get_text(elem);
2151 goto gettext;
2152 }
2153
2154 if (it->sought_tag == Py_None)
2155 return (PyObject *)elem;
2156
2157 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2158 if (rc > 0)
2159 return (PyObject *)elem;
2160
2161 Py_DECREF(elem);
2162 if (rc < 0)
2163 return NULL;
2164 continue;
2165
2166gettext:
2167 if (!text) {
2168 Py_DECREF(elem);
2169 return NULL;
2170 }
2171 if (text == Py_None) {
2172 Py_DECREF(elem);
2173 }
2174 else {
2175 Py_INCREF(text);
2176 Py_DECREF(elem);
2177 rc = PyObject_IsTrue(text);
2178 if (rc > 0)
2179 return text;
2180 Py_DECREF(text);
2181 if (rc < 0)
2182 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002183 }
2184 }
2185
2186 return NULL;
2187}
2188
2189
2190static PyTypeObject ElementIter_Type = {
2191 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002192 /* Using the module's name since the pure-Python implementation does not
2193 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002194 "_elementtree._element_iterator", /* tp_name */
2195 sizeof(ElementIterObject), /* tp_basicsize */
2196 0, /* tp_itemsize */
2197 /* methods */
2198 (destructor)elementiter_dealloc, /* tp_dealloc */
2199 0, /* tp_print */
2200 0, /* tp_getattr */
2201 0, /* tp_setattr */
2202 0, /* tp_reserved */
2203 0, /* tp_repr */
2204 0, /* tp_as_number */
2205 0, /* tp_as_sequence */
2206 0, /* tp_as_mapping */
2207 0, /* tp_hash */
2208 0, /* tp_call */
2209 0, /* tp_str */
2210 0, /* tp_getattro */
2211 0, /* tp_setattro */
2212 0, /* tp_as_buffer */
2213 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2214 0, /* tp_doc */
2215 (traverseproc)elementiter_traverse, /* tp_traverse */
2216 0, /* tp_clear */
2217 0, /* tp_richcompare */
2218 0, /* tp_weaklistoffset */
2219 PyObject_SelfIter, /* tp_iter */
2220 (iternextfunc)elementiter_next, /* tp_iternext */
2221 0, /* tp_methods */
2222 0, /* tp_members */
2223 0, /* tp_getset */
2224 0, /* tp_base */
2225 0, /* tp_dict */
2226 0, /* tp_descr_get */
2227 0, /* tp_descr_set */
2228 0, /* tp_dictoffset */
2229 0, /* tp_init */
2230 0, /* tp_alloc */
2231 0, /* tp_new */
2232};
2233
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002234#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002235
2236static PyObject *
2237create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2238{
2239 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002240
2241 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2242 if (!it)
2243 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002244
Victor Stinner4d463432013-07-11 23:05:03 +02002245 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002246 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002247 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002248 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002249 it->root_element = self;
2250
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002252
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002253 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002254 if (it->parent_stack == NULL) {
2255 Py_DECREF(it);
2256 PyErr_NoMemory();
2257 return NULL;
2258 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002259 it->parent_stack_used = 0;
2260 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002261
Eli Bendersky64d11e62012-06-15 07:42:50 +03002262 return (PyObject *)it;
2263}
2264
2265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002266/* ==================================================================== */
2267/* the tree builder type */
2268
2269typedef struct {
2270 PyObject_HEAD
2271
Eli Bendersky58d548d2012-05-29 15:45:16 +03002272 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002273
Antoine Pitrouee329312012-10-04 19:53:29 +02002274 PyObject *this; /* current node */
2275 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002276
Eli Bendersky58d548d2012-05-29 15:45:16 +03002277 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278
Eli Bendersky58d548d2012-05-29 15:45:16 +03002279 PyObject *stack; /* element stack */
2280 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002281
Eli Bendersky48d358b2012-05-30 17:57:50 +03002282 PyObject *element_factory;
2283
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002285 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002286 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2287 PyObject *end_event_obj;
2288 PyObject *start_ns_event_obj;
2289 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290} TreeBuilderObject;
2291
Christian Heimes90aa7642007-12-19 02:45:37 +00002292#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293
2294/* -------------------------------------------------------------------- */
2295/* constructor and destructor */
2296
Eli Bendersky58d548d2012-05-29 15:45:16 +03002297static PyObject *
2298treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002299{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002300 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2301 if (t != NULL) {
2302 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002303
Eli Bendersky58d548d2012-05-29 15:45:16 +03002304 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002305 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002307 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002308
Eli Bendersky58d548d2012-05-29 15:45:16 +03002309 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002310 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002311 t->stack = PyList_New(20);
2312 if (!t->stack) {
2313 Py_DECREF(t->this);
2314 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002315 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 return NULL;
2317 }
2318 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002320 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 t->start_event_obj = t->end_event_obj = NULL;
2322 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2323 }
2324 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325}
2326
Serhiy Storchakacb985562015-05-04 15:32:48 +03002327/*[clinic input]
2328_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002329
Serhiy Storchakacb985562015-05-04 15:32:48 +03002330 element_factory: object = NULL
2331
2332[clinic start generated code]*/
2333
2334static int
2335_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2336 PyObject *element_factory)
2337/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2338{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002339 if (element_factory) {
2340 Py_INCREF(element_factory);
Serhiy Storchakaec397562016-04-06 09:50:03 +03002341 Py_XSETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002342 }
2343
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345}
2346
Eli Bendersky48d358b2012-05-30 17:57:50 +03002347static int
2348treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2349{
2350 Py_VISIT(self->root);
2351 Py_VISIT(self->this);
2352 Py_VISIT(self->last);
2353 Py_VISIT(self->data);
2354 Py_VISIT(self->stack);
2355 Py_VISIT(self->element_factory);
2356 return 0;
2357}
2358
2359static int
2360treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002361{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002362 Py_CLEAR(self->end_ns_event_obj);
2363 Py_CLEAR(self->start_ns_event_obj);
2364 Py_CLEAR(self->end_event_obj);
2365 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002366 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002367 Py_CLEAR(self->stack);
2368 Py_CLEAR(self->data);
2369 Py_CLEAR(self->last);
2370 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002371 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002372 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002373 return 0;
2374}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002375
Eli Bendersky48d358b2012-05-30 17:57:50 +03002376static void
2377treebuilder_dealloc(TreeBuilderObject *self)
2378{
2379 PyObject_GC_UnTrack(self);
2380 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002381 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002382}
2383
2384/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002385/* helpers for handling of arbitrary element-like objects */
2386
2387static int
2388treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2389 PyObject **dest, _Py_Identifier *name)
2390{
2391 if (Element_CheckExact(element)) {
2392 Py_DECREF(JOIN_OBJ(*dest));
2393 *dest = JOIN_SET(data, PyList_CheckExact(data));
2394 return 0;
2395 }
2396 else {
2397 PyObject *joined = list_join(data);
2398 int r;
2399 if (joined == NULL)
2400 return -1;
2401 r = _PyObject_SetAttrId(element, name, joined);
2402 Py_DECREF(joined);
2403 return r;
2404 }
2405}
2406
2407/* These two functions steal a reference to data */
2408static int
2409treebuilder_set_element_text(PyObject *element, PyObject *data)
2410{
2411 _Py_IDENTIFIER(text);
2412 return treebuilder_set_element_text_or_tail(
2413 element, data, &((ElementObject *) element)->text, &PyId_text);
2414}
2415
2416static int
2417treebuilder_set_element_tail(PyObject *element, PyObject *data)
2418{
2419 _Py_IDENTIFIER(tail);
2420 return treebuilder_set_element_text_or_tail(
2421 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2422}
2423
2424static int
2425treebuilder_add_subelement(PyObject *element, PyObject *child)
2426{
2427 _Py_IDENTIFIER(append);
2428 if (Element_CheckExact(element)) {
2429 ElementObject *elem = (ElementObject *) element;
2430 return element_add_subelement(elem, child);
2431 }
2432 else {
2433 PyObject *res;
2434 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2435 if (res == NULL)
2436 return -1;
2437 Py_DECREF(res);
2438 return 0;
2439 }
2440}
2441
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002442LOCAL(int)
2443treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2444 PyObject *node)
2445{
2446 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002447 PyObject *res;
2448 PyObject *event = PyTuple_Pack(2, action, node);
2449 if (event == NULL)
2450 return -1;
2451 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2452 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002453 if (res == NULL)
2454 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002455 Py_DECREF(res);
2456 }
2457 return 0;
2458}
2459
Antoine Pitrouee329312012-10-04 19:53:29 +02002460/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461/* handlers */
2462
2463LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002464treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2465 PyObject* attrib)
2466{
2467 PyObject* node;
2468 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002469 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470
2471 if (self->data) {
2472 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002473 if (treebuilder_set_element_text(self->last, self->data))
2474 return NULL;
2475 }
2476 else {
2477 if (treebuilder_set_element_tail(self->last, self->data))
2478 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002479 }
2480 self->data = NULL;
2481 }
2482
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002483 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002484 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002485 } else if (attrib == Py_None) {
2486 attrib = PyDict_New();
2487 if (!attrib)
2488 return NULL;
2489 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2490 Py_DECREF(attrib);
2491 }
2492 else {
2493 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002494 }
2495 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002497 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498
Antoine Pitrouee329312012-10-04 19:53:29 +02002499 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500
2501 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002502 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002503 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504 } else {
2505 if (self->root) {
2506 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002507 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508 "multiple elements on top level"
2509 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002510 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511 }
2512 Py_INCREF(node);
2513 self->root = node;
2514 }
2515
2516 if (self->index < PyList_GET_SIZE(self->stack)) {
2517 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002518 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 Py_INCREF(this);
2520 } else {
2521 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002522 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 }
2524 self->index++;
2525
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002527 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002529 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002531 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2532 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533
2534 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002535
2536 error:
2537 Py_DECREF(node);
2538 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539}
2540
2541LOCAL(PyObject*)
2542treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2543{
2544 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002545 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002546 /* ignore calls to data before the first call to start */
2547 Py_RETURN_NONE;
2548 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002549 /* store the first item as is */
2550 Py_INCREF(data); self->data = data;
2551 } else {
2552 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002553 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2554 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002555 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 /* expat often generates single character data sections; handle
2557 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002558 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2559 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002561 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 } else if (PyList_CheckExact(self->data)) {
2563 if (PyList_Append(self->data, data) < 0)
2564 return NULL;
2565 } else {
2566 PyObject* list = PyList_New(2);
2567 if (!list)
2568 return NULL;
2569 PyList_SET_ITEM(list, 0, self->data);
2570 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2571 self->data = list;
2572 }
2573 }
2574
2575 Py_RETURN_NONE;
2576}
2577
2578LOCAL(PyObject*)
2579treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2580{
2581 PyObject* item;
2582
2583 if (self->data) {
2584 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002585 if (treebuilder_set_element_text(self->last, self->data))
2586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002588 if (treebuilder_set_element_tail(self->last, self->data))
2589 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590 }
2591 self->data = NULL;
2592 }
2593
2594 if (self->index == 0) {
2595 PyErr_SetString(
2596 PyExc_IndexError,
2597 "pop from empty stack"
2598 );
2599 return NULL;
2600 }
2601
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002602 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002603 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002604 self->index--;
2605 self->this = PyList_GET_ITEM(self->stack, self->index);
2606 Py_INCREF(self->this);
2607 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002608
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002609 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002611
2612 Py_INCREF(self->last);
2613 return (PyObject*) self->last;
2614}
2615
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616/* -------------------------------------------------------------------- */
2617/* methods (in alphabetical order) */
2618
Serhiy Storchakacb985562015-05-04 15:32:48 +03002619/*[clinic input]
2620_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621
Serhiy Storchakacb985562015-05-04 15:32:48 +03002622 data: object
2623 /
2624
2625[clinic start generated code]*/
2626
2627static PyObject *
2628_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2629/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2630{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631 return treebuilder_handle_data(self, data);
2632}
2633
Serhiy Storchakacb985562015-05-04 15:32:48 +03002634/*[clinic input]
2635_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002636
Serhiy Storchakacb985562015-05-04 15:32:48 +03002637 tag: object
2638 /
2639
2640[clinic start generated code]*/
2641
2642static PyObject *
2643_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2644/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2645{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646 return treebuilder_handle_end(self, tag);
2647}
2648
2649LOCAL(PyObject*)
2650treebuilder_done(TreeBuilderObject* self)
2651{
2652 PyObject* res;
2653
2654 /* FIXME: check stack size? */
2655
2656 if (self->root)
2657 res = self->root;
2658 else
2659 res = Py_None;
2660
2661 Py_INCREF(res);
2662 return res;
2663}
2664
Serhiy Storchakacb985562015-05-04 15:32:48 +03002665/*[clinic input]
2666_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002667
Serhiy Storchakacb985562015-05-04 15:32:48 +03002668[clinic start generated code]*/
2669
2670static PyObject *
2671_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2672/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2673{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002674 return treebuilder_done(self);
2675}
2676
Serhiy Storchakacb985562015-05-04 15:32:48 +03002677/*[clinic input]
2678_elementtree.TreeBuilder.start
2679
2680 tag: object
2681 attrs: object = None
2682 /
2683
2684[clinic start generated code]*/
2685
2686static PyObject *
2687_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2688 PyObject *attrs)
2689/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002691 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692}
2693
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694/* ==================================================================== */
2695/* the expat interface */
2696
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002697#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002699
2700/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2701 * cached globally without being in per-module state.
2702 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002703static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002705
Eli Bendersky52467b12012-06-01 07:13:08 +03002706static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2707 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2708
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709typedef struct {
2710 PyObject_HEAD
2711
2712 XML_Parser parser;
2713
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002714 PyObject *target;
2715 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002717 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002719 PyObject *handle_start;
2720 PyObject *handle_data;
2721 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002723 PyObject *handle_comment;
2724 PyObject *handle_pi;
2725 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002728
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729} XMLParserObject;
2730
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002731static PyObject*
2732_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2733static PyObject *
2734_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2735 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002736
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737/* helpers */
2738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739LOCAL(PyObject*)
2740makeuniversal(XMLParserObject* self, const char* string)
2741{
2742 /* convert a UTF-8 tag/attribute name from the expat parser
2743 to a universal name string */
2744
Antoine Pitrouc1948842012-10-01 23:40:37 +02002745 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 PyObject* key;
2747 PyObject* value;
2748
2749 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002750 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 if (!key)
2752 return NULL;
2753
2754 value = PyDict_GetItem(self->names, key);
2755
2756 if (value) {
2757 Py_INCREF(value);
2758 } else {
2759 /* new name. convert to universal name, and decode as
2760 necessary */
2761
2762 PyObject* tag;
2763 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002764 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
2766 /* look for namespace separator */
2767 for (i = 0; i < size; i++)
2768 if (string[i] == '}')
2769 break;
2770 if (i != size) {
2771 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002772 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002773 if (tag == NULL) {
2774 Py_DECREF(key);
2775 return NULL;
2776 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002777 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778 p[0] = '{';
2779 memcpy(p+1, string, size);
2780 size++;
2781 } else {
2782 /* plain name; use key as tag */
2783 Py_INCREF(key);
2784 tag = key;
2785 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002786
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002789 value = PyUnicode_DecodeUTF8(p, size, "strict");
2790 Py_DECREF(tag);
2791 if (!value) {
2792 Py_DECREF(key);
2793 return NULL;
2794 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795
2796 /* add to names dictionary */
2797 if (PyDict_SetItem(self->names, key, value) < 0) {
2798 Py_DECREF(key);
2799 Py_DECREF(value);
2800 return NULL;
2801 }
2802 }
2803
2804 Py_DECREF(key);
2805 return value;
2806}
2807
Eli Bendersky5b77d812012-03-16 08:20:05 +02002808/* Set the ParseError exception with the given parameters.
2809 * If message is not NULL, it's used as the error string. Otherwise, the
2810 * message string is the default for the given error_code.
2811*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002812static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002813expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2814 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002815{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002816 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002817 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002818
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002819 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002820 message ? message : EXPAT(ErrorString)(error_code),
2821 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002822 if (errmsg == NULL)
2823 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824
Eli Bendersky532d03e2013-08-10 08:00:39 -07002825 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002826 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002827 if (!error)
2828 return;
2829
Eli Bendersky5b77d812012-03-16 08:20:05 +02002830 /* Add code and position attributes */
2831 code = PyLong_FromLong((long)error_code);
2832 if (!code) {
2833 Py_DECREF(error);
2834 return;
2835 }
2836 if (PyObject_SetAttrString(error, "code", code) == -1) {
2837 Py_DECREF(error);
2838 Py_DECREF(code);
2839 return;
2840 }
2841 Py_DECREF(code);
2842
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002843 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002844 if (!position) {
2845 Py_DECREF(error);
2846 return;
2847 }
2848 if (PyObject_SetAttrString(error, "position", position) == -1) {
2849 Py_DECREF(error);
2850 Py_DECREF(position);
2851 return;
2852 }
2853 Py_DECREF(position);
2854
Eli Bendersky532d03e2013-08-10 08:00:39 -07002855 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002856 Py_DECREF(error);
2857}
2858
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002859/* -------------------------------------------------------------------- */
2860/* handlers */
2861
2862static void
2863expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2864 int data_len)
2865{
2866 PyObject* key;
2867 PyObject* value;
2868 PyObject* res;
2869
2870 if (data_len < 2 || data_in[0] != '&')
2871 return;
2872
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002873 if (PyErr_Occurred())
2874 return;
2875
Neal Norwitz0269b912007-08-08 06:56:02 +00002876 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 if (!key)
2878 return;
2879
2880 value = PyDict_GetItem(self->entity, key);
2881
2882 if (value) {
2883 if (TreeBuilder_CheckExact(self->target))
2884 res = treebuilder_handle_data(
2885 (TreeBuilderObject*) self->target, value
2886 );
2887 else if (self->handle_data)
2888 res = PyObject_CallFunction(self->handle_data, "O", value);
2889 else
2890 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002892 } else if (!PyErr_Occurred()) {
2893 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002894 char message[128] = "undefined entity ";
2895 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002896 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002897 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002899 EXPAT(GetErrorColumnNumber)(self->parser),
2900 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901 );
2902 }
2903
2904 Py_DECREF(key);
2905}
2906
2907static void
2908expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2909 const XML_Char **attrib_in)
2910{
2911 PyObject* res;
2912 PyObject* tag;
2913 PyObject* attrib;
2914 int ok;
2915
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002916 if (PyErr_Occurred())
2917 return;
2918
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919 /* tag name */
2920 tag = makeuniversal(self, tag_in);
2921 if (!tag)
2922 return; /* parser will look for errors */
2923
2924 /* attributes */
2925 if (attrib_in[0]) {
2926 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002927 if (!attrib) {
2928 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002930 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 while (attrib_in[0] && attrib_in[1]) {
2932 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002933 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 if (!key || !value) {
2935 Py_XDECREF(value);
2936 Py_XDECREF(key);
2937 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002938 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 return;
2940 }
2941 ok = PyDict_SetItem(attrib, key, value);
2942 Py_DECREF(value);
2943 Py_DECREF(key);
2944 if (ok < 0) {
2945 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002946 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 return;
2948 }
2949 attrib_in += 2;
2950 }
2951 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002952 Py_INCREF(Py_None);
2953 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002954 }
2955
2956 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002957 /* shortcut */
2958 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2959 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002960 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002961 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002962 if (attrib == Py_None) {
2963 Py_DECREF(attrib);
2964 attrib = PyDict_New();
2965 if (!attrib) {
2966 Py_DECREF(tag);
2967 return;
2968 }
2969 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002970 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002971 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 res = NULL;
2973
2974 Py_DECREF(tag);
2975 Py_DECREF(attrib);
2976
2977 Py_XDECREF(res);
2978}
2979
2980static void
2981expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2982 int data_len)
2983{
2984 PyObject* data;
2985 PyObject* res;
2986
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002987 if (PyErr_Occurred())
2988 return;
2989
Neal Norwitz0269b912007-08-08 06:56:02 +00002990 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002991 if (!data)
2992 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993
2994 if (TreeBuilder_CheckExact(self->target))
2995 /* shortcut */
2996 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2997 else if (self->handle_data)
2998 res = PyObject_CallFunction(self->handle_data, "O", data);
2999 else
3000 res = NULL;
3001
3002 Py_DECREF(data);
3003
3004 Py_XDECREF(res);
3005}
3006
3007static void
3008expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3009{
3010 PyObject* tag;
3011 PyObject* res = NULL;
3012
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003013 if (PyErr_Occurred())
3014 return;
3015
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 if (TreeBuilder_CheckExact(self->target))
3017 /* shortcut */
3018 /* the standard tree builder doesn't look at the end tag */
3019 res = treebuilder_handle_end(
3020 (TreeBuilderObject*) self->target, Py_None
3021 );
3022 else if (self->handle_end) {
3023 tag = makeuniversal(self, tag_in);
3024 if (tag) {
3025 res = PyObject_CallFunction(self->handle_end, "O", tag);
3026 Py_DECREF(tag);
3027 }
3028 }
3029
3030 Py_XDECREF(res);
3031}
3032
3033static void
3034expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3035 const XML_Char *uri)
3036{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003037 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3038 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003039
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003040 if (PyErr_Occurred())
3041 return;
3042
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003043 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003044 return;
3045
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003046 if (!uri)
3047 uri = "";
3048 if (!prefix)
3049 prefix = "";
3050
3051 parcel = Py_BuildValue("ss", prefix, uri);
3052 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003053 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003054 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3055 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003056}
3057
3058static void
3059expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3060{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003061 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3062
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003063 if (PyErr_Occurred())
3064 return;
3065
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003066 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003067 return;
3068
3069 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003070}
3071
3072static void
3073expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3074{
3075 PyObject* comment;
3076 PyObject* res;
3077
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003078 if (PyErr_Occurred())
3079 return;
3080
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003081 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003082 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083 if (comment) {
3084 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3085 Py_XDECREF(res);
3086 Py_DECREF(comment);
3087 }
3088 }
3089}
3090
Eli Bendersky45839902013-01-13 05:14:47 -08003091static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003092expat_start_doctype_handler(XMLParserObject *self,
3093 const XML_Char *doctype_name,
3094 const XML_Char *sysid,
3095 const XML_Char *pubid,
3096 int has_internal_subset)
3097{
3098 PyObject *self_pyobj = (PyObject *)self;
3099 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3100 PyObject *parser_doctype = NULL;
3101 PyObject *res = NULL;
3102
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003103 if (PyErr_Occurred())
3104 return;
3105
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003106 doctype_name_obj = makeuniversal(self, doctype_name);
3107 if (!doctype_name_obj)
3108 return;
3109
3110 if (sysid) {
3111 sysid_obj = makeuniversal(self, sysid);
3112 if (!sysid_obj) {
3113 Py_DECREF(doctype_name_obj);
3114 return;
3115 }
3116 } else {
3117 Py_INCREF(Py_None);
3118 sysid_obj = Py_None;
3119 }
3120
3121 if (pubid) {
3122 pubid_obj = makeuniversal(self, pubid);
3123 if (!pubid_obj) {
3124 Py_DECREF(doctype_name_obj);
3125 Py_DECREF(sysid_obj);
3126 return;
3127 }
3128 } else {
3129 Py_INCREF(Py_None);
3130 pubid_obj = Py_None;
3131 }
3132
3133 /* If the target has a handler for doctype, call it. */
3134 if (self->handle_doctype) {
3135 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3136 doctype_name_obj, pubid_obj, sysid_obj);
3137 Py_CLEAR(res);
3138 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003139 else {
3140 /* Now see if the parser itself has a doctype method. If yes and it's
3141 * a custom method, call it but warn about deprecation. If it's only
3142 * the vanilla XMLParser method, do nothing.
3143 */
3144 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3145 if (parser_doctype &&
3146 !(PyCFunction_Check(parser_doctype) &&
3147 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3148 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003149 (PyCFunction) _elementtree_XMLParser_doctype)) {
3150 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3151 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003152 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003153 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003154 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003155 res = PyObject_CallFunction(parser_doctype, "OOO",
3156 doctype_name_obj, pubid_obj, sysid_obj);
3157 Py_CLEAR(res);
3158 }
3159 }
3160
3161clear:
3162 Py_XDECREF(parser_doctype);
3163 Py_DECREF(doctype_name_obj);
3164 Py_DECREF(pubid_obj);
3165 Py_DECREF(sysid_obj);
3166}
3167
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003168static void
3169expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3170 const XML_Char* data_in)
3171{
3172 PyObject* target;
3173 PyObject* data;
3174 PyObject* res;
3175
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003176 if (PyErr_Occurred())
3177 return;
3178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003179 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003180 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3181 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003182 if (target && data) {
3183 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3184 Py_XDECREF(res);
3185 Py_DECREF(data);
3186 Py_DECREF(target);
3187 } else {
3188 Py_XDECREF(data);
3189 Py_XDECREF(target);
3190 }
3191 }
3192}
3193
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003194/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003195
Eli Bendersky52467b12012-06-01 07:13:08 +03003196static PyObject *
3197xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198{
Eli Bendersky52467b12012-06-01 07:13:08 +03003199 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3200 if (self) {
3201 self->parser = NULL;
3202 self->target = self->entity = self->names = NULL;
3203 self->handle_start = self->handle_data = self->handle_end = NULL;
3204 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003205 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003206 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003207 return (PyObject *)self;
3208}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003209
Serhiy Storchakacb985562015-05-04 15:32:48 +03003210/*[clinic input]
3211_elementtree.XMLParser.__init__
3212
3213 html: object = NULL
3214 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003215 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003216
3217[clinic start generated code]*/
3218
Eli Bendersky52467b12012-06-01 07:13:08 +03003219static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003220_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3221 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003222/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003223{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003224 self->entity = PyDict_New();
3225 if (!self->entity)
3226 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003227
Serhiy Storchakacb985562015-05-04 15:32:48 +03003228 self->names = PyDict_New();
3229 if (!self->names) {
3230 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003231 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003233
Serhiy Storchakacb985562015-05-04 15:32:48 +03003234 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3235 if (!self->parser) {
3236 Py_CLEAR(self->entity);
3237 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003239 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 }
3241
Eli Bendersky52467b12012-06-01 07:13:08 +03003242 if (target) {
3243 Py_INCREF(target);
3244 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003245 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 Py_CLEAR(self->entity);
3248 Py_CLEAR(self->names);
3249 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003250 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003251 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003253 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254
Serhiy Storchakacb985562015-05-04 15:32:48 +03003255 self->handle_start = PyObject_GetAttrString(target, "start");
3256 self->handle_data = PyObject_GetAttrString(target, "data");
3257 self->handle_end = PyObject_GetAttrString(target, "end");
3258 self->handle_comment = PyObject_GetAttrString(target, "comment");
3259 self->handle_pi = PyObject_GetAttrString(target, "pi");
3260 self->handle_close = PyObject_GetAttrString(target, "close");
3261 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262
3263 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003264
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003268 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 (XML_StartElementHandler) expat_start_handler,
3270 (XML_EndElementHandler) expat_end_handler
3271 );
3272 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003273 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 (XML_DefaultHandler) expat_default_handler
3275 );
3276 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003277 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 (XML_CharacterDataHandler) expat_data_handler
3279 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003280 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003282 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283 (XML_CommentHandler) expat_comment_handler
3284 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003287 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 (XML_ProcessingInstructionHandler) expat_pi_handler
3289 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003290 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003292 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3293 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003295 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003296 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298
Eli Bendersky52467b12012-06-01 07:13:08 +03003299 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300}
3301
Eli Bendersky52467b12012-06-01 07:13:08 +03003302static int
3303xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3304{
3305 Py_VISIT(self->handle_close);
3306 Py_VISIT(self->handle_pi);
3307 Py_VISIT(self->handle_comment);
3308 Py_VISIT(self->handle_end);
3309 Py_VISIT(self->handle_data);
3310 Py_VISIT(self->handle_start);
3311
3312 Py_VISIT(self->target);
3313 Py_VISIT(self->entity);
3314 Py_VISIT(self->names);
3315
3316 return 0;
3317}
3318
3319static int
3320xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321{
3322 EXPAT(ParserFree)(self->parser);
3323
Antoine Pitrouc1948842012-10-01 23:40:37 +02003324 Py_CLEAR(self->handle_close);
3325 Py_CLEAR(self->handle_pi);
3326 Py_CLEAR(self->handle_comment);
3327 Py_CLEAR(self->handle_end);
3328 Py_CLEAR(self->handle_data);
3329 Py_CLEAR(self->handle_start);
3330 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331
Antoine Pitrouc1948842012-10-01 23:40:37 +02003332 Py_CLEAR(self->target);
3333 Py_CLEAR(self->entity);
3334 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335
Eli Bendersky52467b12012-06-01 07:13:08 +03003336 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337}
3338
Eli Bendersky52467b12012-06-01 07:13:08 +03003339static void
3340xmlparser_dealloc(XMLParserObject* self)
3341{
3342 PyObject_GC_UnTrack(self);
3343 xmlparser_gc_clear(self);
3344 Py_TYPE(self)->tp_free((PyObject *)self);
3345}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003346
3347LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003348expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003349{
3350 int ok;
3351
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003352 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3354
3355 if (PyErr_Occurred())
3356 return NULL;
3357
3358 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003359 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003360 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003361 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003362 EXPAT(GetErrorColumnNumber)(self->parser),
3363 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364 );
3365 return NULL;
3366 }
3367
3368 Py_RETURN_NONE;
3369}
3370
Serhiy Storchakacb985562015-05-04 15:32:48 +03003371/*[clinic input]
3372_elementtree.XMLParser.close
3373
3374[clinic start generated code]*/
3375
3376static PyObject *
3377_elementtree_XMLParser_close_impl(XMLParserObject *self)
3378/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379{
3380 /* end feeding data to parser */
3381
3382 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003383 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003384 if (!res)
3385 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003387 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388 Py_DECREF(res);
3389 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003390 }
3391 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003392 Py_DECREF(res);
3393 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003394 }
3395 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003396 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003397 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003398}
3399
Serhiy Storchakacb985562015-05-04 15:32:48 +03003400/*[clinic input]
3401_elementtree.XMLParser.feed
3402
3403 data: object
3404 /
3405
3406[clinic start generated code]*/
3407
3408static PyObject *
3409_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3410/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411{
3412 /* feed data to parser */
3413
Serhiy Storchakacb985562015-05-04 15:32:48 +03003414 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003415 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003416 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3417 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003418 return NULL;
3419 if (data_len > INT_MAX) {
3420 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3421 return NULL;
3422 }
3423 /* Explicitly set UTF-8 encoding. Return code ignored. */
3424 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003425 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003426 }
3427 else {
3428 Py_buffer view;
3429 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003430 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003431 return NULL;
3432 if (view.len > INT_MAX) {
3433 PyBuffer_Release(&view);
3434 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3435 return NULL;
3436 }
3437 res = expat_parse(self, view.buf, (int)view.len, 0);
3438 PyBuffer_Release(&view);
3439 return res;
3440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003441}
3442
Serhiy Storchakacb985562015-05-04 15:32:48 +03003443/*[clinic input]
3444_elementtree.XMLParser._parse_whole
3445
3446 file: object
3447 /
3448
3449[clinic start generated code]*/
3450
3451static PyObject *
3452_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3453/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454{
Eli Benderskya3699232013-05-19 18:47:23 -07003455 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456 PyObject* reader;
3457 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003458 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003459 PyObject* res;
3460
Serhiy Storchakacb985562015-05-04 15:32:48 +03003461 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462 if (!reader)
3463 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003464
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465 /* read from open file object */
3466 for (;;) {
3467
3468 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3469
3470 if (!buffer) {
3471 /* read failed (e.g. due to KeyboardInterrupt) */
3472 Py_DECREF(reader);
3473 return NULL;
3474 }
3475
Eli Benderskyf996e772012-03-16 05:53:30 +02003476 if (PyUnicode_CheckExact(buffer)) {
3477 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003478 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003479 Py_DECREF(buffer);
3480 break;
3481 }
3482 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003483 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003484 if (!temp) {
3485 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003486 Py_DECREF(reader);
3487 return NULL;
3488 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003489 buffer = temp;
3490 }
3491 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492 Py_DECREF(buffer);
3493 break;
3494 }
3495
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003496 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3497 Py_DECREF(buffer);
3498 Py_DECREF(reader);
3499 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3500 return NULL;
3501 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003502 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003503 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504 );
3505
3506 Py_DECREF(buffer);
3507
3508 if (!res) {
3509 Py_DECREF(reader);
3510 return NULL;
3511 }
3512 Py_DECREF(res);
3513
3514 }
3515
3516 Py_DECREF(reader);
3517
3518 res = expat_parse(self, "", 0, 1);
3519
3520 if (res && TreeBuilder_CheckExact(self->target)) {
3521 Py_DECREF(res);
3522 return treebuilder_done((TreeBuilderObject*) self->target);
3523 }
3524
3525 return res;
3526}
3527
Serhiy Storchakacb985562015-05-04 15:32:48 +03003528/*[clinic input]
3529_elementtree.XMLParser.doctype
3530
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003531 name: object
3532 pubid: object
3533 system: object
3534 /
3535
Serhiy Storchakacb985562015-05-04 15:32:48 +03003536[clinic start generated code]*/
3537
3538static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003539_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3540 PyObject *pubid, PyObject *system)
3541/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003542{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003543 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3544 "This method of XMLParser is deprecated. Define"
3545 " doctype() method on the TreeBuilder target.",
3546 1) < 0) {
3547 return NULL;
3548 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003549 Py_RETURN_NONE;
3550}
3551
Serhiy Storchakacb985562015-05-04 15:32:48 +03003552/*[clinic input]
3553_elementtree.XMLParser._setevents
3554
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003555 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003556 events_to_report: object = None
3557 /
3558
3559[clinic start generated code]*/
3560
3561static PyObject *
3562_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3563 PyObject *events_queue,
3564 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003565/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003566{
3567 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003568 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003569 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003570 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003571
3572 if (!TreeBuilder_CheckExact(self->target)) {
3573 PyErr_SetString(
3574 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003575 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003576 "targets"
3577 );
3578 return NULL;
3579 }
3580
3581 target = (TreeBuilderObject*) self->target;
3582
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003583 events_append = PyObject_GetAttrString(events_queue, "append");
3584 if (events_append == NULL)
3585 return NULL;
Serhiy Storchakaec397562016-04-06 09:50:03 +03003586 Py_XSETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587
3588 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003589 Py_CLEAR(target->start_event_obj);
3590 Py_CLEAR(target->end_event_obj);
3591 Py_CLEAR(target->start_ns_event_obj);
3592 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003594 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003596 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003597 Py_RETURN_NONE;
3598 }
3599
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003600 if (!(events_seq = PySequence_Fast(events_to_report,
3601 "events must be a sequence"))) {
3602 return NULL;
3603 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003605 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003606 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3607 char *event_name = NULL;
3608 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003609 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003610 } else if (PyBytes_Check(event_name_obj)) {
3611 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003612 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003613 if (event_name == NULL) {
3614 Py_DECREF(events_seq);
3615 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3616 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003617 }
3618
3619 Py_INCREF(event_name_obj);
3620 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003621 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003622 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003623 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003624 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003625 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626 EXPAT(SetNamespaceDeclHandler)(
3627 self->parser,
3628 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3629 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3630 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003631 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003632 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 EXPAT(SetNamespaceDeclHandler)(
3634 self->parser,
3635 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3636 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3637 );
3638 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003639 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003640 Py_DECREF(events_seq);
3641 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642 return NULL;
3643 }
3644 }
3645
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003646 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003647 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648}
3649
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003650static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003651xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003652{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003653 if (PyUnicode_Check(nameobj)) {
3654 PyObject* res;
3655 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3656 res = self->entity;
3657 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3658 res = self->target;
3659 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3660 return PyUnicode_FromFormat(
3661 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003663 }
3664 else
3665 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003666
Alexander Belopolskye239d232010-12-08 23:31:48 +00003667 Py_INCREF(res);
3668 return res;
3669 }
3670 generic:
3671 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672}
3673
Serhiy Storchakacb985562015-05-04 15:32:48 +03003674#include "clinic/_elementtree.c.h"
3675
3676static PyMethodDef element_methods[] = {
3677
3678 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3679
3680 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3681 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3682
3683 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3684 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3685 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3686
3687 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3688 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3689 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3690 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3691
3692 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3693 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3694 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3695
3696 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3697 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3698
3699 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3700 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3701
3702 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3703
3704 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3705 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3706 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3707 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3708 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3709
3710 {NULL, NULL}
3711};
3712
3713static PyMappingMethods element_as_mapping = {
3714 (lenfunc) element_length,
3715 (binaryfunc) element_subscr,
3716 (objobjargproc) element_ass_subscr,
3717};
3718
Serhiy Storchakadde08152015-11-25 15:28:13 +02003719static PyGetSetDef element_getsetlist[] = {
3720 {"tag",
3721 (getter)element_tag_getter,
3722 (setter)element_tag_setter,
3723 "A string identifying what kind of data this element represents"},
3724 {"text",
3725 (getter)element_text_getter,
3726 (setter)element_text_setter,
3727 "A string of text directly after the start tag, or None"},
3728 {"tail",
3729 (getter)element_tail_getter,
3730 (setter)element_tail_setter,
3731 "A string of text directly after the end tag, or None"},
3732 {"attrib",
3733 (getter)element_attrib_getter,
3734 (setter)element_attrib_setter,
3735 "A dictionary containing the element's attributes"},
3736 {NULL},
3737};
3738
Serhiy Storchakacb985562015-05-04 15:32:48 +03003739static PyTypeObject Element_Type = {
3740 PyVarObject_HEAD_INIT(NULL, 0)
3741 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3742 /* methods */
3743 (destructor)element_dealloc, /* tp_dealloc */
3744 0, /* tp_print */
3745 0, /* tp_getattr */
3746 0, /* tp_setattr */
3747 0, /* tp_reserved */
3748 (reprfunc)element_repr, /* tp_repr */
3749 0, /* tp_as_number */
3750 &element_as_sequence, /* tp_as_sequence */
3751 &element_as_mapping, /* tp_as_mapping */
3752 0, /* tp_hash */
3753 0, /* tp_call */
3754 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003755 PyObject_GenericGetAttr, /* tp_getattro */
3756 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003757 0, /* tp_as_buffer */
3758 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3759 /* tp_flags */
3760 0, /* tp_doc */
3761 (traverseproc)element_gc_traverse, /* tp_traverse */
3762 (inquiry)element_gc_clear, /* tp_clear */
3763 0, /* tp_richcompare */
3764 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3765 0, /* tp_iter */
3766 0, /* tp_iternext */
3767 element_methods, /* tp_methods */
3768 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003769 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003770 0, /* tp_base */
3771 0, /* tp_dict */
3772 0, /* tp_descr_get */
3773 0, /* tp_descr_set */
3774 0, /* tp_dictoffset */
3775 (initproc)element_init, /* tp_init */
3776 PyType_GenericAlloc, /* tp_alloc */
3777 element_new, /* tp_new */
3778 0, /* tp_free */
3779};
3780
3781static PyMethodDef treebuilder_methods[] = {
3782 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3783 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3784 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3785 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3786 {NULL, NULL}
3787};
3788
3789static PyTypeObject TreeBuilder_Type = {
3790 PyVarObject_HEAD_INIT(NULL, 0)
3791 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3792 /* methods */
3793 (destructor)treebuilder_dealloc, /* tp_dealloc */
3794 0, /* tp_print */
3795 0, /* tp_getattr */
3796 0, /* tp_setattr */
3797 0, /* tp_reserved */
3798 0, /* tp_repr */
3799 0, /* tp_as_number */
3800 0, /* tp_as_sequence */
3801 0, /* tp_as_mapping */
3802 0, /* tp_hash */
3803 0, /* tp_call */
3804 0, /* tp_str */
3805 0, /* tp_getattro */
3806 0, /* tp_setattro */
3807 0, /* tp_as_buffer */
3808 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3809 /* tp_flags */
3810 0, /* tp_doc */
3811 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3812 (inquiry)treebuilder_gc_clear, /* tp_clear */
3813 0, /* tp_richcompare */
3814 0, /* tp_weaklistoffset */
3815 0, /* tp_iter */
3816 0, /* tp_iternext */
3817 treebuilder_methods, /* tp_methods */
3818 0, /* tp_members */
3819 0, /* tp_getset */
3820 0, /* tp_base */
3821 0, /* tp_dict */
3822 0, /* tp_descr_get */
3823 0, /* tp_descr_set */
3824 0, /* tp_dictoffset */
3825 _elementtree_TreeBuilder___init__, /* tp_init */
3826 PyType_GenericAlloc, /* tp_alloc */
3827 treebuilder_new, /* tp_new */
3828 0, /* tp_free */
3829};
3830
3831static PyMethodDef xmlparser_methods[] = {
3832 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3833 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3834 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3835 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3836 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3837 {NULL, NULL}
3838};
3839
Neal Norwitz227b5332006-03-22 09:28:35 +00003840static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003841 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003842 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003843 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003844 (destructor)xmlparser_dealloc, /* tp_dealloc */
3845 0, /* tp_print */
3846 0, /* tp_getattr */
3847 0, /* tp_setattr */
3848 0, /* tp_reserved */
3849 0, /* tp_repr */
3850 0, /* tp_as_number */
3851 0, /* tp_as_sequence */
3852 0, /* tp_as_mapping */
3853 0, /* tp_hash */
3854 0, /* tp_call */
3855 0, /* tp_str */
3856 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3857 0, /* tp_setattro */
3858 0, /* tp_as_buffer */
3859 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3860 /* tp_flags */
3861 0, /* tp_doc */
3862 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3863 (inquiry)xmlparser_gc_clear, /* tp_clear */
3864 0, /* tp_richcompare */
3865 0, /* tp_weaklistoffset */
3866 0, /* tp_iter */
3867 0, /* tp_iternext */
3868 xmlparser_methods, /* tp_methods */
3869 0, /* tp_members */
3870 0, /* tp_getset */
3871 0, /* tp_base */
3872 0, /* tp_dict */
3873 0, /* tp_descr_get */
3874 0, /* tp_descr_set */
3875 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003876 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003877 PyType_GenericAlloc, /* tp_alloc */
3878 xmlparser_new, /* tp_new */
3879 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003880};
3881
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003882/* ==================================================================== */
3883/* python module interface */
3884
3885static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003886 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003887 {NULL, NULL}
3888};
3889
Martin v. Löwis1a214512008-06-11 05:26:20 +00003890
Eli Bendersky532d03e2013-08-10 08:00:39 -07003891static struct PyModuleDef elementtreemodule = {
3892 PyModuleDef_HEAD_INIT,
3893 "_elementtree",
3894 NULL,
3895 sizeof(elementtreestate),
3896 _functions,
3897 NULL,
3898 elementtree_traverse,
3899 elementtree_clear,
3900 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003901};
3902
Neal Norwitzf6657e62006-12-28 04:47:50 +00003903PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003904PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003905{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003906 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003907 elementtreestate *st;
3908
3909 m = PyState_FindModule(&elementtreemodule);
3910 if (m) {
3911 Py_INCREF(m);
3912 return m;
3913 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003914
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003915 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003916 if (PyType_Ready(&ElementIter_Type) < 0)
3917 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003918 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003919 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003920 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003921 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003922 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003923 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003924
Eli Bendersky532d03e2013-08-10 08:00:39 -07003925 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003926 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003927 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003928 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003929
Eli Bendersky828efde2012-04-05 05:40:58 +03003930 if (!(temp = PyImport_ImportModule("copy")))
3931 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003932 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003933 Py_XDECREF(temp);
3934
Eli Bendersky532d03e2013-08-10 08:00:39 -07003935 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003936 return NULL;
3937
Eli Bendersky20d41742012-06-01 09:48:37 +03003938 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003939 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3940 if (expat_capi) {
3941 /* check that it's usable */
3942 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003943 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003944 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3945 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003946 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003947 PyErr_SetString(PyExc_ImportError,
3948 "pyexpat version is incompatible");
3949 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003950 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003951 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003952 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003953 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003954
Eli Bendersky532d03e2013-08-10 08:00:39 -07003955 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003956 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003957 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003958 Py_INCREF(st->parseerror_obj);
3959 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003960
Eli Bendersky092af1f2012-03-04 07:14:03 +02003961 Py_INCREF((PyObject *)&Element_Type);
3962 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3963
Eli Bendersky58d548d2012-05-29 15:45:16 +03003964 Py_INCREF((PyObject *)&TreeBuilder_Type);
3965 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3966
Eli Bendersky52467b12012-06-01 07:13:08 +03003967 Py_INCREF((PyObject *)&XMLParser_Type);
3968 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003969
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003970 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003971}