blob: 580c53a417832a85ce63ad4938bcbbb86a36e2be [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
399 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200401 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200402 Py_DECREF(tmp);
403
404 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 Py_DECREF(JOIN_OBJ(tmp));
408
409 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200411 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_DECREF(JOIN_OBJ(tmp));
413
414 return 0;
415}
416
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200420 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000421 PyObject* *children;
422
423 /* make sure self->children can hold the given number of extra
424 elements. set an exception and return -1 if allocation failed */
425
Victor Stinner5f0af232013-07-11 23:01:36 +0200426 if (!self->extra) {
427 if (create_extra(self, NULL) < 0)
428 return -1;
429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432
433 if (size > self->extra->allocated) {
434 /* use Python 2.4's list growth strategy */
435 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000436 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100437 * which needs at least 4 bytes.
438 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000439 * be safe.
440 */
441 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
443 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100446 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * false alarm always assume at least one child to be safe.
448 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 children = PyObject_Realloc(self->extra->children,
450 size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 } else {
454 children = PyObject_Malloc(size * sizeof(PyObject*));
455 if (!children)
456 goto nomemory;
457 /* copy existing children from static area to malloc buffer */
458 memcpy(children, self->extra->children,
459 self->extra->length * sizeof(PyObject*));
460 }
461 self->extra->children = children;
462 self->extra->allocated = size;
463 }
464
465 return 0;
466
467 nomemory:
468 PyErr_NoMemory();
469 return -1;
470}
471
472LOCAL(int)
473element_add_subelement(ElementObject* self, PyObject* element)
474{
475 /* add a child element to a parent */
476
477 if (element_resize(self, 1) < 0)
478 return -1;
479
480 Py_INCREF(element);
481 self->extra->children[self->extra->length] = element;
482
483 self->extra->length++;
484
485 return 0;
486}
487
488LOCAL(PyObject*)
489element_get_attrib(ElementObject* self)
490{
491 /* return borrowed reference to attrib dictionary */
492 /* note: this function assumes that the extra section exists */
493
494 PyObject* res = self->extra->attrib;
495
496 if (res == Py_None) {
497 /* create missing dictionary */
498 res = PyDict_New();
499 if (!res)
500 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200501 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000502 self->extra->attrib = res;
503 }
504
505 return res;
506}
507
508LOCAL(PyObject*)
509element_get_text(ElementObject* self)
510{
511 /* return borrowed reference to text attribute */
512
513 PyObject* res = self->text;
514
515 if (JOIN_GET(res)) {
516 res = JOIN_OBJ(res);
517 if (PyList_CheckExact(res)) {
518 res = list_join(res);
519 if (!res)
520 return NULL;
521 self->text = res;
522 }
523 }
524
525 return res;
526}
527
528LOCAL(PyObject*)
529element_get_tail(ElementObject* self)
530{
531 /* return borrowed reference to text attribute */
532
533 PyObject* res = self->tail;
534
535 if (JOIN_GET(res)) {
536 res = JOIN_OBJ(res);
537 if (PyList_CheckExact(res)) {
538 res = list_join(res);
539 if (!res)
540 return NULL;
541 self->tail = res;
542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300631
632 if (self->weakreflist != NULL)
633 PyObject_ClearWeakRefs((PyObject *) self);
634
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 /* element_gc_clear clears all references and deallocates extra
636 */
637 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638
639 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200640 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000641}
642
643/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
Serhiy Storchakacb985562015-05-04 15:32:48 +0300645/*[clinic input]
646_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648 subelement: object(subclass_of='&Element_Type')
649 /
650
651[clinic start generated code]*/
652
653static PyObject *
654_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
655/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
656{
657 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658 return NULL;
659
660 Py_RETURN_NONE;
661}
662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666[clinic start generated code]*/
667
668static PyObject *
669_elementtree_Element_clear_impl(ElementObject *self)
670/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
671{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300672 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 Py_INCREF(Py_None);
675 Py_DECREF(JOIN_OBJ(self->text));
676 self->text = Py_None;
677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->tail));
680 self->tail = Py_None;
681
682 Py_RETURN_NONE;
683}
684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.__copy__
687
688[clinic start generated code]*/
689
690static PyObject *
691_elementtree_Element___copy___impl(ElementObject *self)
692/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 ElementObject* element;
696
Eli Bendersky092af1f2012-03-04 07:14:03 +0200697 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800698 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699 if (!element)
700 return NULL;
701
702 Py_DECREF(JOIN_OBJ(element->text));
703 element->text = self->text;
704 Py_INCREF(JOIN_OBJ(element->text));
705
706 Py_DECREF(JOIN_OBJ(element->tail));
707 element->tail = self->tail;
708 Py_INCREF(JOIN_OBJ(element->tail));
709
710 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000711 if (element_resize(element, self->extra->length) < 0) {
712 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 for (i = 0; i < self->extra->length; i++) {
717 Py_INCREF(self->extra->children[i]);
718 element->extra->children[i] = self->extra->children[i];
719 }
720
721 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722 }
723
724 return (PyObject*) element;
725}
726
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200727/* Helper for a deep copy. */
728LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
729
Serhiy Storchakacb985562015-05-04 15:32:48 +0300730/*[clinic input]
731_elementtree.Element.__deepcopy__
732
733 memo: object
734 /
735
736[clinic start generated code]*/
737
738static PyObject *
739_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
740/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 ElementObject* element;
744 PyObject* tag;
745 PyObject* attrib;
746 PyObject* text;
747 PyObject* tail;
748 PyObject* id;
749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200802 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200820LOCAL(PyObject *)
821deepcopy(PyObject *object, PyObject *memo)
822{
823 /* do a deep copy of the given object */
824 PyObject *args;
825 PyObject *result;
826 elementtreestate *st;
827
828 /* Fast paths */
829 if (object == Py_None || PyUnicode_CheckExact(object)) {
830 Py_INCREF(object);
831 return object;
832 }
833
834 if (Py_REFCNT(object) == 1) {
835 if (PyDict_CheckExact(object)) {
836 PyObject *key, *value;
837 Py_ssize_t pos = 0;
838 int simple = 1;
839 while (PyDict_Next(object, &pos, &key, &value)) {
840 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
841 simple = 0;
842 break;
843 }
844 }
845 if (simple)
846 return PyDict_Copy(object);
847 /* Fall through to general case */
848 }
849 else if (Element_CheckExact(object)) {
850 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
851 }
852 }
853
854 /* General case */
855 st = ET_STATE_GLOBAL;
856 if (!st->deepcopy_obj) {
857 PyErr_SetString(PyExc_RuntimeError,
858 "deepcopy helper not found");
859 return NULL;
860 }
861
862 args = PyTuple_Pack(2, object, memo);
863 if (!args)
864 return NULL;
865 result = PyObject_CallObject(st->deepcopy_obj, args);
866 Py_DECREF(args);
867 return result;
868}
869
870
Serhiy Storchakacb985562015-05-04 15:32:48 +0300871/*[clinic input]
872_elementtree.Element.__sizeof__ -> Py_ssize_t
873
874[clinic start generated code]*/
875
876static Py_ssize_t
877_elementtree_Element___sizeof___impl(ElementObject *self)
878/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200880 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881 if (self->extra) {
882 result += sizeof(ElementObjectExtra);
883 if (self->extra->children != self->extra->_children)
884 result += sizeof(PyObject*) * self->extra->allocated;
885 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300886 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200887}
888
Eli Bendersky698bdb22013-01-10 06:01:06 -0800889/* dict keys for getstate/setstate. */
890#define PICKLED_TAG "tag"
891#define PICKLED_CHILDREN "_children"
892#define PICKLED_ATTRIB "attrib"
893#define PICKLED_TAIL "tail"
894#define PICKLED_TEXT "text"
895
896/* __getstate__ returns a fabricated instance dict as in the pure-Python
897 * Element implementation, for interoperability/interchangeability. This
898 * makes the pure-Python implementation details an API, but (a) there aren't
899 * any unnecessary structures there; and (b) it buys compatibility with 3.2
900 * pickles. See issue #16076.
901 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902/*[clinic input]
903_elementtree.Element.__getstate__
904
905[clinic start generated code]*/
906
Eli Bendersky698bdb22013-01-10 06:01:06 -0800907static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300908_elementtree_Element___getstate___impl(ElementObject *self)
909/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200911 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 PyObject *instancedict = NULL, *children;
913
914 /* Build a list of children. */
915 children = PyList_New(self->extra ? self->extra->length : 0);
916 if (!children)
917 return NULL;
918 for (i = 0; i < PyList_GET_SIZE(children); i++) {
919 PyObject *child = self->extra->children[i];
920 Py_INCREF(child);
921 PyList_SET_ITEM(children, i, child);
922 }
923
924 /* Construct the state object. */
925 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
926 if (noattrib)
927 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
928 PICKLED_TAG, self->tag,
929 PICKLED_CHILDREN, children,
930 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700931 PICKLED_TEXT, JOIN_OBJ(self->text),
932 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800933 else
934 instancedict = Py_BuildValue("{sOsOsOsOsO}",
935 PICKLED_TAG, self->tag,
936 PICKLED_CHILDREN, children,
937 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700938 PICKLED_TEXT, JOIN_OBJ(self->text),
939 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800940 if (instancedict) {
941 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800943 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944 else {
945 for (i = 0; i < PyList_GET_SIZE(children); i++)
946 Py_DECREF(PyList_GET_ITEM(children, i));
947 Py_DECREF(children);
948
949 return NULL;
950 }
951}
952
953static PyObject *
954element_setstate_from_attributes(ElementObject *self,
955 PyObject *tag,
956 PyObject *attrib,
957 PyObject *text,
958 PyObject *tail,
959 PyObject *children)
960{
961 Py_ssize_t i, nchildren;
962
963 if (!tag) {
964 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
965 return NULL;
966 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200968 Py_INCREF(tag);
969 Py_SETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Eli Benderskydd3661e2013-09-13 06:24:25 -0700971 _clear_joined_ptr(&self->text);
972 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
973 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
Eli Benderskydd3661e2013-09-13 06:24:25 -0700975 _clear_joined_ptr(&self->tail);
976 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
977 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978
979 /* Handle ATTRIB and CHILDREN. */
980 if (!children && !attrib)
981 Py_RETURN_NONE;
982
983 /* Compute 'nchildren'. */
984 if (children) {
985 if (!PyList_Check(children)) {
986 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
987 return NULL;
988 }
989 nchildren = PyList_Size(children);
990 }
991 else {
992 nchildren = 0;
993 }
994
995 /* Allocate 'extra'. */
996 if (element_resize(self, nchildren)) {
997 return NULL;
998 }
999 assert(self->extra && self->extra->allocated >= nchildren);
1000
1001 /* Copy children */
1002 for (i = 0; i < nchildren; i++) {
1003 self->extra->children[i] = PyList_GET_ITEM(children, i);
1004 Py_INCREF(self->extra->children[i]);
1005 }
1006
1007 self->extra->length = nchildren;
1008 self->extra->allocated = nchildren;
1009
1010 /* Stash attrib. */
1011 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 Py_INCREF(attrib);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001013 Py_SETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014 }
1015
1016 Py_RETURN_NONE;
1017}
1018
1019/* __setstate__ for Element instance from the Python implementation.
1020 * 'state' should be the instance dict.
1021 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001022
Eli Bendersky698bdb22013-01-10 06:01:06 -08001023static PyObject *
1024element_setstate_from_Python(ElementObject *self, PyObject *state)
1025{
1026 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1027 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1028 PyObject *args;
1029 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031
Eli Bendersky698bdb22013-01-10 06:01:06 -08001032 tag = attrib = text = tail = children = NULL;
1033 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036
1037 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1038 &attrib, &text, &tail, &children))
1039 retval = element_setstate_from_attributes(self, tag, attrib, text,
1040 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001041 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001042 retval = NULL;
1043
1044 Py_DECREF(args);
1045 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001046}
1047
Serhiy Storchakacb985562015-05-04 15:32:48 +03001048/*[clinic input]
1049_elementtree.Element.__setstate__
1050
1051 state: object
1052 /
1053
1054[clinic start generated code]*/
1055
Eli Bendersky698bdb22013-01-10 06:01:06 -08001056static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001057_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1058/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001059{
1060 if (!PyDict_CheckExact(state)) {
1061 PyErr_Format(PyExc_TypeError,
1062 "Don't know how to unpickle \"%.200R\" as an Element",
1063 state);
1064 return NULL;
1065 }
1066 else
1067 return element_setstate_from_Python(self, state);
1068}
1069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001070LOCAL(int)
1071checkpath(PyObject* tag)
1072{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001073 Py_ssize_t i;
1074 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075
1076 /* check if a tag contains an xpath character */
1077
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001078#define PATHCHAR(ch) \
1079 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001080
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1083 void *data = PyUnicode_DATA(tag);
1084 unsigned int kind = PyUnicode_KIND(tag);
1085 for (i = 0; i < len; i++) {
1086 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1087 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001091 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 return 1;
1093 }
1094 return 0;
1095 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001096 if (PyBytes_Check(tag)) {
1097 char *p = PyBytes_AS_STRING(tag);
1098 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001099 if (p[i] == '{')
1100 check = 0;
1101 else if (p[i] == '}')
1102 check = 1;
1103 else if (check && PATHCHAR(p[i]))
1104 return 1;
1105 }
1106 return 0;
1107 }
1108
1109 return 1; /* unknown type; might be path expression */
1110}
1111
Serhiy Storchakacb985562015-05-04 15:32:48 +03001112/*[clinic input]
1113_elementtree.Element.extend
1114
1115 elements: object
1116 /
1117
1118[clinic start generated code]*/
1119
1120static PyObject *
1121_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1122/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123{
1124 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001125 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 if (!seq) {
1129 PyErr_Format(
1130 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001131 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001132 );
1133 return NULL;
1134 }
1135
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001137 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 Py_INCREF(element);
1139 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001140 PyErr_Format(
1141 PyExc_TypeError,
1142 "expected an Element, not \"%.200s\"",
1143 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001144 Py_DECREF(seq);
1145 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001146 return NULL;
1147 }
1148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (element_add_subelement(self, element) < 0) {
1150 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001151 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001152 return NULL;
1153 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001154 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001155 }
1156
1157 Py_DECREF(seq);
1158
1159 Py_RETURN_NONE;
1160}
1161
Serhiy Storchakacb985562015-05-04 15:32:48 +03001162/*[clinic input]
1163_elementtree.Element.find
1164
1165 path: object
1166 namespaces: object = None
1167
1168[clinic start generated code]*/
1169
1170static PyObject *
1171_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1172 PyObject *namespaces)
1173/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001175 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001176 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177
Serhiy Storchakacb985562015-05-04 15:32:48 +03001178 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001179 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001180 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001181 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001183 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184
1185 if (!self->extra)
1186 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001188 for (i = 0; i < self->extra->length; i++) {
1189 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 int rc;
1191 if (!Element_CheckExact(item))
1192 continue;
1193 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001194 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001197 Py_DECREF(item);
1198 if (rc < 0)
1199 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001200 }
1201
1202 Py_RETURN_NONE;
1203}
1204
Serhiy Storchakacb985562015-05-04 15:32:48 +03001205/*[clinic input]
1206_elementtree.Element.findtext
1207
1208 path: object
1209 default: object = None
1210 namespaces: object = None
1211
1212[clinic start generated code]*/
1213
1214static PyObject *
1215_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1216 PyObject *default_value,
1217 PyObject *namespaces)
1218/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001220 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001221 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001222 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001223
Serhiy Storchakacb985562015-05-04 15:32:48 +03001224 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001225 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001226 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227 );
1228
1229 if (!self->extra) {
1230 Py_INCREF(default_value);
1231 return default_value;
1232 }
1233
1234 for (i = 0; i < self->extra->length; i++) {
1235 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001236 int rc;
1237 if (!Element_CheckExact(item))
1238 continue;
1239 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001240 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 if (text == Py_None) {
1244 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001245 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001247 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 return text;
1250 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 }
1255
1256 Py_INCREF(default_value);
1257 return default_value;
1258}
1259
Serhiy Storchakacb985562015-05-04 15:32:48 +03001260/*[clinic input]
1261_elementtree.Element.findall
1262
1263 path: object
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1270 PyObject *namespaces)
1271/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001273 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001275 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001276 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001279 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001281 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001283 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284
1285 out = PyList_New(0);
1286 if (!out)
1287 return NULL;
1288
1289 if (!self->extra)
1290 return out;
1291
1292 for (i = 0; i < self->extra->length; i++) {
1293 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 int rc;
1295 if (!Element_CheckExact(item))
1296 continue;
1297 Py_INCREF(item);
1298 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1299 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1300 Py_DECREF(item);
1301 Py_DECREF(out);
1302 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001304 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 }
1306
1307 return out;
1308}
1309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310/*[clinic input]
1311_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001312
Serhiy Storchakacb985562015-05-04 15:32:48 +03001313 path: object
1314 namespaces: object = None
1315
1316[clinic start generated code]*/
1317
1318static PyObject *
1319_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1320 PyObject *namespaces)
1321/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1322{
1323 PyObject* tag = path;
1324 _Py_IDENTIFIER(iterfind);
1325 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001327 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001328 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001329}
1330
Serhiy Storchakacb985562015-05-04 15:32:48 +03001331/*[clinic input]
1332_elementtree.Element.get
1333
1334 key: object
1335 default: object = None
1336
1337[clinic start generated code]*/
1338
1339static PyObject *
1340_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1341 PyObject *default_value)
1342/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343{
1344 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345
1346 if (!self->extra || self->extra->attrib == Py_None)
1347 value = default_value;
1348 else {
1349 value = PyDict_GetItem(self->extra->attrib, key);
1350 if (!value)
1351 value = default_value;
1352 }
1353
1354 Py_INCREF(value);
1355 return value;
1356}
1357
Serhiy Storchakacb985562015-05-04 15:32:48 +03001358/*[clinic input]
1359_elementtree.Element.getchildren
1360
1361[clinic start generated code]*/
1362
1363static PyObject *
1364_elementtree_Element_getchildren_impl(ElementObject *self)
1365/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001367 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368 PyObject* list;
1369
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001370 /* FIXME: report as deprecated? */
1371
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 if (!self->extra)
1373 return PyList_New(0);
1374
1375 list = PyList_New(self->extra->length);
1376 if (!list)
1377 return NULL;
1378
1379 for (i = 0; i < self->extra->length; i++) {
1380 PyObject* item = self->extra->children[i];
1381 Py_INCREF(item);
1382 PyList_SET_ITEM(list, i, item);
1383 }
1384
1385 return list;
1386}
1387
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001388
Eli Bendersky64d11e62012-06-15 07:42:50 +03001389static PyObject *
1390create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1391
1392
Serhiy Storchakacb985562015-05-04 15:32:48 +03001393/*[clinic input]
1394_elementtree.Element.iter
1395
1396 tag: object = None
1397
1398[clinic start generated code]*/
1399
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001401_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1402/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001404 if (PyUnicode_Check(tag)) {
1405 if (PyUnicode_READY(tag) < 0)
1406 return NULL;
1407 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1408 tag = Py_None;
1409 }
1410 else if (PyBytes_Check(tag)) {
1411 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1412 tag = Py_None;
1413 }
1414
Eli Bendersky64d11e62012-06-15 07:42:50 +03001415 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416}
1417
1418
Serhiy Storchakacb985562015-05-04 15:32:48 +03001419/*[clinic input]
1420_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001421
Serhiy Storchakacb985562015-05-04 15:32:48 +03001422[clinic start generated code]*/
1423
1424static PyObject *
1425_elementtree_Element_itertext_impl(ElementObject *self)
1426/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1427{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001428 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429}
1430
Eli Bendersky64d11e62012-06-15 07:42:50 +03001431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001433element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001435 ElementObject* self = (ElementObject*) self_;
1436
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001437 if (!self->extra || index < 0 || index >= self->extra->length) {
1438 PyErr_SetString(
1439 PyExc_IndexError,
1440 "child index out of range"
1441 );
1442 return NULL;
1443 }
1444
1445 Py_INCREF(self->extra->children[index]);
1446 return self->extra->children[index];
1447}
1448
Serhiy Storchakacb985562015-05-04 15:32:48 +03001449/*[clinic input]
1450_elementtree.Element.insert
1451
1452 index: Py_ssize_t
1453 subelement: object(subclass_of='&Element_Type')
1454 /
1455
1456[clinic start generated code]*/
1457
1458static PyObject *
1459_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1460 PyObject *subelement)
1461/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001463 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
Victor Stinner5f0af232013-07-11 23:01:36 +02001465 if (!self->extra) {
1466 if (create_extra(self, NULL) < 0)
1467 return NULL;
1468 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001470 if (index < 0) {
1471 index += self->extra->length;
1472 if (index < 0)
1473 index = 0;
1474 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001475 if (index > self->extra->length)
1476 index = self->extra->length;
1477
1478 if (element_resize(self, 1) < 0)
1479 return NULL;
1480
1481 for (i = self->extra->length; i > index; i--)
1482 self->extra->children[i] = self->extra->children[i-1];
1483
Serhiy Storchakacb985562015-05-04 15:32:48 +03001484 Py_INCREF(subelement);
1485 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486
1487 self->extra->length++;
1488
1489 Py_RETURN_NONE;
1490}
1491
Serhiy Storchakacb985562015-05-04 15:32:48 +03001492/*[clinic input]
1493_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495[clinic start generated code]*/
1496
1497static PyObject *
1498_elementtree_Element_items_impl(ElementObject *self)
1499/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1500{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001501 if (!self->extra || self->extra->attrib == Py_None)
1502 return PyList_New(0);
1503
1504 return PyDict_Items(self->extra->attrib);
1505}
1506
Serhiy Storchakacb985562015-05-04 15:32:48 +03001507/*[clinic input]
1508_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509
Serhiy Storchakacb985562015-05-04 15:32:48 +03001510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_keys_impl(ElementObject *self)
1514/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1515{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516 if (!self->extra || self->extra->attrib == Py_None)
1517 return PyList_New(0);
1518
1519 return PyDict_Keys(self->extra->attrib);
1520}
1521
Martin v. Löwis18e16552006-02-15 17:27:45 +00001522static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001523element_length(ElementObject* self)
1524{
1525 if (!self->extra)
1526 return 0;
1527
1528 return self->extra->length;
1529}
1530
Serhiy Storchakacb985562015-05-04 15:32:48 +03001531/*[clinic input]
1532_elementtree.Element.makeelement
1533
1534 tag: object
1535 attrib: object
1536 /
1537
1538[clinic start generated code]*/
1539
1540static PyObject *
1541_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1542 PyObject *attrib)
1543/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544{
1545 PyObject* elem;
1546
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547 attrib = PyDict_Copy(attrib);
1548 if (!attrib)
1549 return NULL;
1550
Eli Bendersky092af1f2012-03-04 07:14:03 +02001551 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552
1553 Py_DECREF(attrib);
1554
1555 return elem;
1556}
1557
Serhiy Storchakacb985562015-05-04 15:32:48 +03001558/*[clinic input]
1559_elementtree.Element.remove
1560
1561 subelement: object(subclass_of='&Element_Type')
1562 /
1563
1564[clinic start generated code]*/
1565
1566static PyObject *
1567_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1568/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001570 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001571 int rc;
1572 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 if (!self->extra) {
1575 /* element has no children, so raise exception */
1576 PyErr_SetString(
1577 PyExc_ValueError,
1578 "list.remove(x): x not in list"
1579 );
1580 return NULL;
1581 }
1582
1583 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001584 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001586 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001587 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001589 if (rc < 0)
1590 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591 }
1592
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001593 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001594 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 PyErr_SetString(
1596 PyExc_ValueError,
1597 "list.remove(x): x not in list"
1598 );
1599 return NULL;
1600 }
1601
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001602 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603
1604 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605 for (; i < self->extra->length; i++)
1606 self->extra->children[i] = self->extra->children[i+1];
1607
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001608 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609 Py_RETURN_NONE;
1610}
1611
1612static PyObject*
1613element_repr(ElementObject* self)
1614{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001615 if (self->tag)
1616 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1617 else
1618 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619}
1620
Serhiy Storchakacb985562015-05-04 15:32:48 +03001621/*[clinic input]
1622_elementtree.Element.set
1623
1624 key: object
1625 value: object
1626 /
1627
1628[clinic start generated code]*/
1629
1630static PyObject *
1631_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1632 PyObject *value)
1633/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634{
1635 PyObject* attrib;
1636
Victor Stinner5f0af232013-07-11 23:01:36 +02001637 if (!self->extra) {
1638 if (create_extra(self, NULL) < 0)
1639 return NULL;
1640 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641
1642 attrib = element_get_attrib(self);
1643 if (!attrib)
1644 return NULL;
1645
1646 if (PyDict_SetItem(attrib, key, value) < 0)
1647 return NULL;
1648
1649 Py_RETURN_NONE;
1650}
1651
1652static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001653element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001654{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001655 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001656 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 PyObject* old;
1658
1659 if (!self->extra || index < 0 || index >= self->extra->length) {
1660 PyErr_SetString(
1661 PyExc_IndexError,
1662 "child assignment index out of range");
1663 return -1;
1664 }
1665
1666 old = self->extra->children[index];
1667
1668 if (item) {
1669 Py_INCREF(item);
1670 self->extra->children[index] = item;
1671 } else {
1672 self->extra->length--;
1673 for (i = index; i < self->extra->length; i++)
1674 self->extra->children[i] = self->extra->children[i+1];
1675 }
1676
1677 Py_DECREF(old);
1678
1679 return 0;
1680}
1681
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001682static PyObject*
1683element_subscr(PyObject* self_, PyObject* item)
1684{
1685 ElementObject* self = (ElementObject*) self_;
1686
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001687 if (PyIndex_Check(item)) {
1688 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689
1690 if (i == -1 && PyErr_Occurred()) {
1691 return NULL;
1692 }
1693 if (i < 0 && self->extra)
1694 i += self->extra->length;
1695 return element_getitem(self_, i);
1696 }
1697 else if (PySlice_Check(item)) {
1698 Py_ssize_t start, stop, step, slicelen, cur, i;
1699 PyObject* list;
1700
1701 if (!self->extra)
1702 return PyList_New(0);
1703
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001704 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 self->extra->length,
1706 &start, &stop, &step, &slicelen) < 0) {
1707 return NULL;
1708 }
1709
1710 if (slicelen <= 0)
1711 return PyList_New(0);
1712 else {
1713 list = PyList_New(slicelen);
1714 if (!list)
1715 return NULL;
1716
1717 for (cur = start, i = 0; i < slicelen;
1718 cur += step, i++) {
1719 PyObject* item = self->extra->children[cur];
1720 Py_INCREF(item);
1721 PyList_SET_ITEM(list, i, item);
1722 }
1723
1724 return list;
1725 }
1726 }
1727 else {
1728 PyErr_SetString(PyExc_TypeError,
1729 "element indices must be integers");
1730 return NULL;
1731 }
1732}
1733
1734static int
1735element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1736{
1737 ElementObject* self = (ElementObject*) self_;
1738
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001739 if (PyIndex_Check(item)) {
1740 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741
1742 if (i == -1 && PyErr_Occurred()) {
1743 return -1;
1744 }
1745 if (i < 0 && self->extra)
1746 i += self->extra->length;
1747 return element_setitem(self_, i, value);
1748 }
1749 else if (PySlice_Check(item)) {
1750 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1751
1752 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001753 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001754
Victor Stinner5f0af232013-07-11 23:01:36 +02001755 if (!self->extra) {
1756 if (create_extra(self, NULL) < 0)
1757 return -1;
1758 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001759
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001760 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001761 self->extra->length,
1762 &start, &stop, &step, &slicelen) < 0) {
1763 return -1;
1764 }
1765
Eli Bendersky865756a2012-03-09 13:38:15 +02001766 if (value == NULL) {
1767 /* Delete slice */
1768 size_t cur;
1769 Py_ssize_t i;
1770
1771 if (slicelen <= 0)
1772 return 0;
1773
1774 /* Since we're deleting, the direction of the range doesn't matter,
1775 * so for simplicity make it always ascending.
1776 */
1777 if (step < 0) {
1778 stop = start + 1;
1779 start = stop + step * (slicelen - 1) - 1;
1780 step = -step;
1781 }
1782
1783 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1784
1785 /* recycle is a list that will contain all the children
1786 * scheduled for removal.
1787 */
1788 if (!(recycle = PyList_New(slicelen))) {
1789 PyErr_NoMemory();
1790 return -1;
1791 }
1792
1793 /* This loop walks over all the children that have to be deleted,
1794 * with cur pointing at them. num_moved is the amount of children
1795 * until the next deleted child that have to be "shifted down" to
1796 * occupy the deleted's places.
1797 * Note that in the ith iteration, shifting is done i+i places down
1798 * because i children were already removed.
1799 */
1800 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1801 /* Compute how many children have to be moved, clipping at the
1802 * list end.
1803 */
1804 Py_ssize_t num_moved = step - 1;
1805 if (cur + step >= (size_t)self->extra->length) {
1806 num_moved = self->extra->length - cur - 1;
1807 }
1808
1809 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1810
1811 memmove(
1812 self->extra->children + cur - i,
1813 self->extra->children + cur + 1,
1814 num_moved * sizeof(PyObject *));
1815 }
1816
1817 /* Leftover "tail" after the last removed child */
1818 cur = start + (size_t)slicelen * step;
1819 if (cur < (size_t)self->extra->length) {
1820 memmove(
1821 self->extra->children + cur - slicelen,
1822 self->extra->children + cur,
1823 (self->extra->length - cur) * sizeof(PyObject *));
1824 }
1825
1826 self->extra->length -= slicelen;
1827
1828 /* Discard the recycle list with all the deleted sub-elements */
1829 Py_XDECREF(recycle);
1830 return 0;
1831 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001832
1833 /* A new slice is actually being assigned */
1834 seq = PySequence_Fast(value, "");
1835 if (!seq) {
1836 PyErr_Format(
1837 PyExc_TypeError,
1838 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1839 );
1840 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001842 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001843
1844 if (step != 1 && newlen != slicelen)
1845 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001846 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001847 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001848 "attempt to assign sequence of size %zd "
1849 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850 newlen, slicelen
1851 );
1852 return -1;
1853 }
1854
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 /* Resize before creating the recycle bin, to prevent refleaks. */
1856 if (newlen > slicelen) {
1857 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001858 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 return -1;
1860 }
1861 }
1862
1863 if (slicelen > 0) {
1864 /* to avoid recursive calls to this method (via decref), move
1865 old items to the recycle bin here, and get rid of them when
1866 we're done modifying the element */
1867 recycle = PyList_New(slicelen);
1868 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870 return -1;
1871 }
1872 for (cur = start, i = 0; i < slicelen;
1873 cur += step, i++)
1874 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1875 }
1876
1877 if (newlen < slicelen) {
1878 /* delete slice */
1879 for (i = stop; i < self->extra->length; i++)
1880 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1881 } else if (newlen > slicelen) {
1882 /* insert slice */
1883 for (i = self->extra->length-1; i >= stop; i--)
1884 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1885 }
1886
1887 /* replace the slice */
1888 for (cur = start, i = 0; i < newlen;
1889 cur += step, i++) {
1890 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1891 Py_INCREF(element);
1892 self->extra->children[cur] = element;
1893 }
1894
1895 self->extra->length += newlen - slicelen;
1896
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001897 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001898
1899 /* discard the recycle bin, and everything in it */
1900 Py_XDECREF(recycle);
1901
1902 return 0;
1903 }
1904 else {
1905 PyErr_SetString(PyExc_TypeError,
1906 "element indices must be integers");
1907 return -1;
1908 }
1909}
1910
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001911static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001912element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001913{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001914 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001915 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001916 return res;
1917}
1918
Serhiy Storchakadde08152015-11-25 15:28:13 +02001919static PyObject*
1920element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001922 PyObject *res = element_get_text(self);
1923 Py_XINCREF(res);
1924 return res;
1925}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001926
Serhiy Storchakadde08152015-11-25 15:28:13 +02001927static PyObject*
1928element_tail_getter(ElementObject *self, void *closure)
1929{
1930 PyObject *res = element_get_tail(self);
1931 Py_XINCREF(res);
1932 return res;
1933}
1934
1935static PyObject*
1936element_attrib_getter(ElementObject *self, void *closure)
1937{
1938 PyObject *res;
1939 if (!self->extra) {
1940 if (create_extra(self, NULL) < 0)
1941 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001942 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001943 res = element_get_attrib(self);
1944 Py_XINCREF(res);
1945 return res;
1946}
Victor Stinner4d463432013-07-11 23:05:03 +02001947
Serhiy Storchakadde08152015-11-25 15:28:13 +02001948/* macro for setter validation */
1949#define _VALIDATE_ATTR_VALUE(V) \
1950 if ((V) == NULL) { \
1951 PyErr_SetString( \
1952 PyExc_AttributeError, \
1953 "can't delete element attribute"); \
1954 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001955 }
1956
Serhiy Storchakadde08152015-11-25 15:28:13 +02001957static int
1958element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1959{
1960 _VALIDATE_ATTR_VALUE(value);
1961 Py_INCREF(value);
Serhiy Storchaka726fc132015-12-27 15:44:33 +02001962 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001963 return 0;
1964}
1965
1966static int
1967element_text_setter(ElementObject *self, PyObject *value, void *closure)
1968{
1969 _VALIDATE_ATTR_VALUE(value);
1970 Py_INCREF(value);
1971 Py_DECREF(JOIN_OBJ(self->text));
1972 self->text = value;
1973 return 0;
1974}
1975
1976static int
1977element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1978{
1979 _VALIDATE_ATTR_VALUE(value);
1980 Py_INCREF(value);
1981 Py_DECREF(JOIN_OBJ(self->tail));
1982 self->tail = value;
1983 return 0;
1984}
1985
1986static int
1987element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1988{
1989 _VALIDATE_ATTR_VALUE(value);
1990 if (!self->extra) {
1991 if (create_extra(self, NULL) < 0)
1992 return -1;
1993 }
1994 Py_INCREF(value);
Serhiy Storchaka726fc132015-12-27 15:44:33 +02001995 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07001996 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001997}
1998
1999static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002000 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002001 0, /* sq_concat */
2002 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002003 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002004 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002006 0,
2007};
2008
Eli Bendersky64d11e62012-06-15 07:42:50 +03002009/******************************* Element iterator ****************************/
2010
2011/* ElementIterObject represents the iteration state over an XML element in
2012 * pre-order traversal. To keep track of which sub-element should be returned
2013 * next, a stack of parents is maintained. This is a standard stack-based
2014 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002015 * The stack is managed using a continuous array.
2016 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002017 * the current one is exhausted, and the next child to examine in that parent.
2018 */
2019typedef struct ParentLocator_t {
2020 ElementObject *parent;
2021 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002022} ParentLocator;
2023
2024typedef struct {
2025 PyObject_HEAD
2026 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002027 Py_ssize_t parent_stack_used;
2028 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 ElementObject *root_element;
2030 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002031 int gettext;
2032} ElementIterObject;
2033
2034
2035static void
2036elementiter_dealloc(ElementIterObject *it)
2037{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002038 Py_ssize_t i = it->parent_stack_used;
2039 it->parent_stack_used = 0;
2040 while (i--)
2041 Py_XDECREF(it->parent_stack[i].parent);
2042 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043
2044 Py_XDECREF(it->sought_tag);
2045 Py_XDECREF(it->root_element);
2046
2047 PyObject_GC_UnTrack(it);
2048 PyObject_GC_Del(it);
2049}
2050
2051static int
2052elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2053{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002054 Py_ssize_t i = it->parent_stack_used;
2055 while (i--)
2056 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002057
2058 Py_VISIT(it->root_element);
2059 Py_VISIT(it->sought_tag);
2060 return 0;
2061}
2062
2063/* Helper function for elementiter_next. Add a new parent to the parent stack.
2064 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002065static int
2066parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002068 ParentLocator *item;
2069
2070 if (it->parent_stack_used >= it->parent_stack_size) {
2071 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2072 ParentLocator *parent_stack = it->parent_stack;
2073 PyMem_Resize(parent_stack, ParentLocator, new_size);
2074 if (parent_stack == NULL)
2075 return -1;
2076 it->parent_stack = parent_stack;
2077 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002079 item = it->parent_stack + it->parent_stack_used++;
2080 Py_INCREF(parent);
2081 item->parent = parent;
2082 item->child_index = 0;
2083 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084}
2085
2086static PyObject *
2087elementiter_next(ElementIterObject *it)
2088{
2089 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002090 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002091 * A short note on gettext: this function serves both the iter() and
2092 * itertext() methods to avoid code duplication. However, there are a few
2093 * small differences in the way these iterations work. Namely:
2094 * - itertext() only yields text from nodes that have it, and continues
2095 * iterating when a node doesn't have text (so it doesn't return any
2096 * node like iter())
2097 * - itertext() also has to handle tail, after finishing with all the
2098 * children of a node.
2099 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002100 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002101 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002102 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103
2104 while (1) {
2105 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002106 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002107 * iterator is exhausted.
2108 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002109 if (!it->parent_stack_used) {
2110 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111 PyErr_SetNone(PyExc_StopIteration);
2112 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113 }
2114
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002115 elem = it->root_element; /* steals a reference */
2116 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 }
2118 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119 /* See if there are children left to traverse in the current parent. If
2120 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2123 Py_ssize_t child_index = item->child_index;
2124 ElementObjectExtra *extra;
2125 elem = item->parent;
2126 extra = elem->extra;
2127 if (!extra || child_index >= extra->length) {
2128 it->parent_stack_used--;
2129 /* Note that extra condition on it->parent_stack_used here;
2130 * this is because itertext() is supposed to only return *inner*
2131 * text, not text following the element it began iteration with.
2132 */
2133 if (it->gettext && it->parent_stack_used) {
2134 text = element_get_tail(elem);
2135 goto gettext;
2136 }
2137 Py_DECREF(elem);
2138 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002139 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002140
2141 elem = (ElementObject *)extra->children[child_index];
2142 item->child_index++;
2143 Py_INCREF(elem);
2144 }
2145
2146 if (parent_stack_push_new(it, elem) < 0) {
2147 Py_DECREF(elem);
2148 PyErr_NoMemory();
2149 return NULL;
2150 }
2151 if (it->gettext) {
2152 text = element_get_text(elem);
2153 goto gettext;
2154 }
2155
2156 if (it->sought_tag == Py_None)
2157 return (PyObject *)elem;
2158
2159 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2160 if (rc > 0)
2161 return (PyObject *)elem;
2162
2163 Py_DECREF(elem);
2164 if (rc < 0)
2165 return NULL;
2166 continue;
2167
2168gettext:
2169 if (!text) {
2170 Py_DECREF(elem);
2171 return NULL;
2172 }
2173 if (text == Py_None) {
2174 Py_DECREF(elem);
2175 }
2176 else {
2177 Py_INCREF(text);
2178 Py_DECREF(elem);
2179 rc = PyObject_IsTrue(text);
2180 if (rc > 0)
2181 return text;
2182 Py_DECREF(text);
2183 if (rc < 0)
2184 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002185 }
2186 }
2187
2188 return NULL;
2189}
2190
2191
2192static PyTypeObject ElementIter_Type = {
2193 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002194 /* Using the module's name since the pure-Python implementation does not
2195 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002196 "_elementtree._element_iterator", /* tp_name */
2197 sizeof(ElementIterObject), /* tp_basicsize */
2198 0, /* tp_itemsize */
2199 /* methods */
2200 (destructor)elementiter_dealloc, /* tp_dealloc */
2201 0, /* tp_print */
2202 0, /* tp_getattr */
2203 0, /* tp_setattr */
2204 0, /* tp_reserved */
2205 0, /* tp_repr */
2206 0, /* tp_as_number */
2207 0, /* tp_as_sequence */
2208 0, /* tp_as_mapping */
2209 0, /* tp_hash */
2210 0, /* tp_call */
2211 0, /* tp_str */
2212 0, /* tp_getattro */
2213 0, /* tp_setattro */
2214 0, /* tp_as_buffer */
2215 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2216 0, /* tp_doc */
2217 (traverseproc)elementiter_traverse, /* tp_traverse */
2218 0, /* tp_clear */
2219 0, /* tp_richcompare */
2220 0, /* tp_weaklistoffset */
2221 PyObject_SelfIter, /* tp_iter */
2222 (iternextfunc)elementiter_next, /* tp_iternext */
2223 0, /* tp_methods */
2224 0, /* tp_members */
2225 0, /* tp_getset */
2226 0, /* tp_base */
2227 0, /* tp_dict */
2228 0, /* tp_descr_get */
2229 0, /* tp_descr_set */
2230 0, /* tp_dictoffset */
2231 0, /* tp_init */
2232 0, /* tp_alloc */
2233 0, /* tp_new */
2234};
2235
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002236#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237
2238static PyObject *
2239create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2240{
2241 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002242
2243 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2244 if (!it)
2245 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002246
Victor Stinner4d463432013-07-11 23:05:03 +02002247 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002248 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002249 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002250 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251 it->root_element = self;
2252
Eli Bendersky64d11e62012-06-15 07:42:50 +03002253 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002254
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002255 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002256 if (it->parent_stack == NULL) {
2257 Py_DECREF(it);
2258 PyErr_NoMemory();
2259 return NULL;
2260 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002261 it->parent_stack_used = 0;
2262 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002263
Eli Bendersky64d11e62012-06-15 07:42:50 +03002264 return (PyObject *)it;
2265}
2266
2267
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268/* ==================================================================== */
2269/* the tree builder type */
2270
2271typedef struct {
2272 PyObject_HEAD
2273
Eli Bendersky58d548d2012-05-29 15:45:16 +03002274 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275
Antoine Pitrouee329312012-10-04 19:53:29 +02002276 PyObject *this; /* current node */
2277 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278
Eli Bendersky58d548d2012-05-29 15:45:16 +03002279 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280
Eli Bendersky58d548d2012-05-29 15:45:16 +03002281 PyObject *stack; /* element stack */
2282 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283
Eli Bendersky48d358b2012-05-30 17:57:50 +03002284 PyObject *element_factory;
2285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002287 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002288 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2289 PyObject *end_event_obj;
2290 PyObject *start_ns_event_obj;
2291 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002292} TreeBuilderObject;
2293
Christian Heimes90aa7642007-12-19 02:45:37 +00002294#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295
2296/* -------------------------------------------------------------------- */
2297/* constructor and destructor */
2298
Eli Bendersky58d548d2012-05-29 15:45:16 +03002299static PyObject *
2300treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002301{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002302 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2303 if (t != NULL) {
2304 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002307 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002308 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002309 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310
Eli Bendersky58d548d2012-05-29 15:45:16 +03002311 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002312 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 t->stack = PyList_New(20);
2314 if (!t->stack) {
2315 Py_DECREF(t->this);
2316 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002317 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 return NULL;
2319 }
2320 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002322 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 t->start_event_obj = t->end_event_obj = NULL;
2324 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2325 }
2326 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327}
2328
Serhiy Storchakacb985562015-05-04 15:32:48 +03002329/*[clinic input]
2330_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002331
Serhiy Storchakacb985562015-05-04 15:32:48 +03002332 element_factory: object = NULL
2333
2334[clinic start generated code]*/
2335
2336static int
2337_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2338 PyObject *element_factory)
2339/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2340{
Eli Bendersky48d358b2012-05-30 17:57:50 +03002341 if (element_factory) {
2342 Py_INCREF(element_factory);
Serhiy Storchaka1ed017a2015-12-27 15:51:32 +02002343 Py_SETREF(self->element_factory, element_factory);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002344 }
2345
Eli Bendersky58d548d2012-05-29 15:45:16 +03002346 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347}
2348
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349static int
2350treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2351{
2352 Py_VISIT(self->root);
2353 Py_VISIT(self->this);
2354 Py_VISIT(self->last);
2355 Py_VISIT(self->data);
2356 Py_VISIT(self->stack);
2357 Py_VISIT(self->element_factory);
2358 return 0;
2359}
2360
2361static int
2362treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002364 Py_CLEAR(self->end_ns_event_obj);
2365 Py_CLEAR(self->start_ns_event_obj);
2366 Py_CLEAR(self->end_event_obj);
2367 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002368 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002369 Py_CLEAR(self->stack);
2370 Py_CLEAR(self->data);
2371 Py_CLEAR(self->last);
2372 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002373 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002374 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002375 return 0;
2376}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002377
Eli Bendersky48d358b2012-05-30 17:57:50 +03002378static void
2379treebuilder_dealloc(TreeBuilderObject *self)
2380{
2381 PyObject_GC_UnTrack(self);
2382 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384}
2385
2386/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002387/* helpers for handling of arbitrary element-like objects */
2388
2389static int
2390treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2391 PyObject **dest, _Py_Identifier *name)
2392{
2393 if (Element_CheckExact(element)) {
2394 Py_DECREF(JOIN_OBJ(*dest));
2395 *dest = JOIN_SET(data, PyList_CheckExact(data));
2396 return 0;
2397 }
2398 else {
2399 PyObject *joined = list_join(data);
2400 int r;
2401 if (joined == NULL)
2402 return -1;
2403 r = _PyObject_SetAttrId(element, name, joined);
2404 Py_DECREF(joined);
2405 return r;
2406 }
2407}
2408
2409/* These two functions steal a reference to data */
2410static int
2411treebuilder_set_element_text(PyObject *element, PyObject *data)
2412{
2413 _Py_IDENTIFIER(text);
2414 return treebuilder_set_element_text_or_tail(
2415 element, data, &((ElementObject *) element)->text, &PyId_text);
2416}
2417
2418static int
2419treebuilder_set_element_tail(PyObject *element, PyObject *data)
2420{
2421 _Py_IDENTIFIER(tail);
2422 return treebuilder_set_element_text_or_tail(
2423 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2424}
2425
2426static int
2427treebuilder_add_subelement(PyObject *element, PyObject *child)
2428{
2429 _Py_IDENTIFIER(append);
2430 if (Element_CheckExact(element)) {
2431 ElementObject *elem = (ElementObject *) element;
2432 return element_add_subelement(elem, child);
2433 }
2434 else {
2435 PyObject *res;
2436 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2437 if (res == NULL)
2438 return -1;
2439 Py_DECREF(res);
2440 return 0;
2441 }
2442}
2443
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002444LOCAL(int)
2445treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2446 PyObject *node)
2447{
2448 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002449 PyObject *res;
2450 PyObject *event = PyTuple_Pack(2, action, node);
2451 if (event == NULL)
2452 return -1;
2453 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2454 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002455 if (res == NULL)
2456 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002457 Py_DECREF(res);
2458 }
2459 return 0;
2460}
2461
Antoine Pitrouee329312012-10-04 19:53:29 +02002462/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002463/* handlers */
2464
2465LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002466treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2467 PyObject* attrib)
2468{
2469 PyObject* node;
2470 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002471 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472
2473 if (self->data) {
2474 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002475 if (treebuilder_set_element_text(self->last, self->data))
2476 return NULL;
2477 }
2478 else {
2479 if (treebuilder_set_element_tail(self->last, self->data))
2480 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481 }
2482 self->data = NULL;
2483 }
2484
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002485 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002486 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002487 } else if (attrib == Py_None) {
2488 attrib = PyDict_New();
2489 if (!attrib)
2490 return NULL;
2491 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2492 Py_DECREF(attrib);
2493 }
2494 else {
2495 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002496 }
2497 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002499 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500
Antoine Pitrouee329312012-10-04 19:53:29 +02002501 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502
2503 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002504 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002505 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 } else {
2507 if (self->root) {
2508 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002509 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510 "multiple elements on top level"
2511 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002512 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 }
2514 Py_INCREF(node);
2515 self->root = node;
2516 }
2517
2518 if (self->index < PyList_GET_SIZE(self->stack)) {
2519 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 Py_INCREF(this);
2522 } else {
2523 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002524 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525 }
2526 self->index++;
2527
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 Py_INCREF(node);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002529 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 Py_INCREF(node);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002531 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002533 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2534 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535
2536 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002537
2538 error:
2539 Py_DECREF(node);
2540 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002541}
2542
2543LOCAL(PyObject*)
2544treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2545{
2546 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002547 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002548 /* ignore calls to data before the first call to start */
2549 Py_RETURN_NONE;
2550 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 /* store the first item as is */
2552 Py_INCREF(data); self->data = data;
2553 } else {
2554 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002555 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2556 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002557 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002558 /* expat often generates single character data sections; handle
2559 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002560 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2561 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002563 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564 } else if (PyList_CheckExact(self->data)) {
2565 if (PyList_Append(self->data, data) < 0)
2566 return NULL;
2567 } else {
2568 PyObject* list = PyList_New(2);
2569 if (!list)
2570 return NULL;
2571 PyList_SET_ITEM(list, 0, self->data);
2572 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2573 self->data = list;
2574 }
2575 }
2576
2577 Py_RETURN_NONE;
2578}
2579
2580LOCAL(PyObject*)
2581treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2582{
2583 PyObject* item;
2584
2585 if (self->data) {
2586 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002587 if (treebuilder_set_element_text(self->last, self->data))
2588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002590 if (treebuilder_set_element_tail(self->last, self->data))
2591 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002592 }
2593 self->data = NULL;
2594 }
2595
2596 if (self->index == 0) {
2597 PyErr_SetString(
2598 PyExc_IndexError,
2599 "pop from empty stack"
2600 );
2601 return NULL;
2602 }
2603
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002604 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002605 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002606 self->index--;
2607 self->this = PyList_GET_ITEM(self->stack, self->index);
2608 Py_INCREF(self->this);
2609 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002611 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2612 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002613
2614 Py_INCREF(self->last);
2615 return (PyObject*) self->last;
2616}
2617
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618/* -------------------------------------------------------------------- */
2619/* methods (in alphabetical order) */
2620
Serhiy Storchakacb985562015-05-04 15:32:48 +03002621/*[clinic input]
2622_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623
Serhiy Storchakacb985562015-05-04 15:32:48 +03002624 data: object
2625 /
2626
2627[clinic start generated code]*/
2628
2629static PyObject *
2630_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2631/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2632{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633 return treebuilder_handle_data(self, data);
2634}
2635
Serhiy Storchakacb985562015-05-04 15:32:48 +03002636/*[clinic input]
2637_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638
Serhiy Storchakacb985562015-05-04 15:32:48 +03002639 tag: object
2640 /
2641
2642[clinic start generated code]*/
2643
2644static PyObject *
2645_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2646/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2647{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648 return treebuilder_handle_end(self, tag);
2649}
2650
2651LOCAL(PyObject*)
2652treebuilder_done(TreeBuilderObject* self)
2653{
2654 PyObject* res;
2655
2656 /* FIXME: check stack size? */
2657
2658 if (self->root)
2659 res = self->root;
2660 else
2661 res = Py_None;
2662
2663 Py_INCREF(res);
2664 return res;
2665}
2666
Serhiy Storchakacb985562015-05-04 15:32:48 +03002667/*[clinic input]
2668_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002669
Serhiy Storchakacb985562015-05-04 15:32:48 +03002670[clinic start generated code]*/
2671
2672static PyObject *
2673_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2674/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2675{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676 return treebuilder_done(self);
2677}
2678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679/*[clinic input]
2680_elementtree.TreeBuilder.start
2681
2682 tag: object
2683 attrs: object = None
2684 /
2685
2686[clinic start generated code]*/
2687
2688static PyObject *
2689_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2690 PyObject *attrs)
2691/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002692{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002693 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694}
2695
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696/* ==================================================================== */
2697/* the expat interface */
2698
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002699#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002701
2702/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2703 * cached globally without being in per-module state.
2704 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002705static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707
Eli Bendersky52467b12012-06-01 07:13:08 +03002708static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2709 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2710
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711typedef struct {
2712 PyObject_HEAD
2713
2714 XML_Parser parser;
2715
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002716 PyObject *target;
2717 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002718
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002719 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002721 PyObject *handle_start;
2722 PyObject *handle_data;
2723 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002725 PyObject *handle_comment;
2726 PyObject *handle_pi;
2727 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002730
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731} XMLParserObject;
2732
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002733static PyObject*
2734_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2735static PyObject *
2736_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2737 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739/* helpers */
2740
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741LOCAL(PyObject*)
2742makeuniversal(XMLParserObject* self, const char* string)
2743{
2744 /* convert a UTF-8 tag/attribute name from the expat parser
2745 to a universal name string */
2746
Antoine Pitrouc1948842012-10-01 23:40:37 +02002747 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748 PyObject* key;
2749 PyObject* value;
2750
2751 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002752 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753 if (!key)
2754 return NULL;
2755
2756 value = PyDict_GetItem(self->names, key);
2757
2758 if (value) {
2759 Py_INCREF(value);
2760 } else {
2761 /* new name. convert to universal name, and decode as
2762 necessary */
2763
2764 PyObject* tag;
2765 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002766 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767
2768 /* look for namespace separator */
2769 for (i = 0; i < size; i++)
2770 if (string[i] == '}')
2771 break;
2772 if (i != size) {
2773 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002774 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002775 if (tag == NULL) {
2776 Py_DECREF(key);
2777 return NULL;
2778 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002779 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 p[0] = '{';
2781 memcpy(p+1, string, size);
2782 size++;
2783 } else {
2784 /* plain name; use key as tag */
2785 Py_INCREF(key);
2786 tag = key;
2787 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002788
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002789 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002790 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002791 value = PyUnicode_DecodeUTF8(p, size, "strict");
2792 Py_DECREF(tag);
2793 if (!value) {
2794 Py_DECREF(key);
2795 return NULL;
2796 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797
2798 /* add to names dictionary */
2799 if (PyDict_SetItem(self->names, key, value) < 0) {
2800 Py_DECREF(key);
2801 Py_DECREF(value);
2802 return NULL;
2803 }
2804 }
2805
2806 Py_DECREF(key);
2807 return value;
2808}
2809
Eli Bendersky5b77d812012-03-16 08:20:05 +02002810/* Set the ParseError exception with the given parameters.
2811 * If message is not NULL, it's used as the error string. Otherwise, the
2812 * message string is the default for the given error_code.
2813*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002814static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002815expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2816 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002817{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002818 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002819 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002820
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002821 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002822 message ? message : EXPAT(ErrorString)(error_code),
2823 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002824 if (errmsg == NULL)
2825 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002826
Eli Bendersky532d03e2013-08-10 08:00:39 -07002827 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002828 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829 if (!error)
2830 return;
2831
Eli Bendersky5b77d812012-03-16 08:20:05 +02002832 /* Add code and position attributes */
2833 code = PyLong_FromLong((long)error_code);
2834 if (!code) {
2835 Py_DECREF(error);
2836 return;
2837 }
2838 if (PyObject_SetAttrString(error, "code", code) == -1) {
2839 Py_DECREF(error);
2840 Py_DECREF(code);
2841 return;
2842 }
2843 Py_DECREF(code);
2844
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002845 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002846 if (!position) {
2847 Py_DECREF(error);
2848 return;
2849 }
2850 if (PyObject_SetAttrString(error, "position", position) == -1) {
2851 Py_DECREF(error);
2852 Py_DECREF(position);
2853 return;
2854 }
2855 Py_DECREF(position);
2856
Eli Bendersky532d03e2013-08-10 08:00:39 -07002857 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002858 Py_DECREF(error);
2859}
2860
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002861/* -------------------------------------------------------------------- */
2862/* handlers */
2863
2864static void
2865expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2866 int data_len)
2867{
2868 PyObject* key;
2869 PyObject* value;
2870 PyObject* res;
2871
2872 if (data_len < 2 || data_in[0] != '&')
2873 return;
2874
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002875 if (PyErr_Occurred())
2876 return;
2877
Neal Norwitz0269b912007-08-08 06:56:02 +00002878 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002879 if (!key)
2880 return;
2881
2882 value = PyDict_GetItem(self->entity, key);
2883
2884 if (value) {
2885 if (TreeBuilder_CheckExact(self->target))
2886 res = treebuilder_handle_data(
2887 (TreeBuilderObject*) self->target, value
2888 );
2889 else if (self->handle_data)
2890 res = PyObject_CallFunction(self->handle_data, "O", value);
2891 else
2892 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002894 } else if (!PyErr_Occurred()) {
2895 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002896 char message[128] = "undefined entity ";
2897 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002898 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002899 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002900 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002901 EXPAT(GetErrorColumnNumber)(self->parser),
2902 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002903 );
2904 }
2905
2906 Py_DECREF(key);
2907}
2908
2909static void
2910expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2911 const XML_Char **attrib_in)
2912{
2913 PyObject* res;
2914 PyObject* tag;
2915 PyObject* attrib;
2916 int ok;
2917
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002918 if (PyErr_Occurred())
2919 return;
2920
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002921 /* tag name */
2922 tag = makeuniversal(self, tag_in);
2923 if (!tag)
2924 return; /* parser will look for errors */
2925
2926 /* attributes */
2927 if (attrib_in[0]) {
2928 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002929 if (!attrib) {
2930 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002932 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 while (attrib_in[0] && attrib_in[1]) {
2934 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002935 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 if (!key || !value) {
2937 Py_XDECREF(value);
2938 Py_XDECREF(key);
2939 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002940 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 return;
2942 }
2943 ok = PyDict_SetItem(attrib, key, value);
2944 Py_DECREF(value);
2945 Py_DECREF(key);
2946 if (ok < 0) {
2947 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002948 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 return;
2950 }
2951 attrib_in += 2;
2952 }
2953 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002954 Py_INCREF(Py_None);
2955 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002956 }
2957
2958 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 /* shortcut */
2960 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2961 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002962 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002963 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002964 if (attrib == Py_None) {
2965 Py_DECREF(attrib);
2966 attrib = PyDict_New();
2967 if (!attrib) {
2968 Py_DECREF(tag);
2969 return;
2970 }
2971 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002972 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002973 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 res = NULL;
2975
2976 Py_DECREF(tag);
2977 Py_DECREF(attrib);
2978
2979 Py_XDECREF(res);
2980}
2981
2982static void
2983expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2984 int data_len)
2985{
2986 PyObject* data;
2987 PyObject* res;
2988
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002989 if (PyErr_Occurred())
2990 return;
2991
Neal Norwitz0269b912007-08-08 06:56:02 +00002992 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002993 if (!data)
2994 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995
2996 if (TreeBuilder_CheckExact(self->target))
2997 /* shortcut */
2998 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2999 else if (self->handle_data)
3000 res = PyObject_CallFunction(self->handle_data, "O", data);
3001 else
3002 res = NULL;
3003
3004 Py_DECREF(data);
3005
3006 Py_XDECREF(res);
3007}
3008
3009static void
3010expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3011{
3012 PyObject* tag;
3013 PyObject* res = NULL;
3014
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003015 if (PyErr_Occurred())
3016 return;
3017
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003018 if (TreeBuilder_CheckExact(self->target))
3019 /* shortcut */
3020 /* the standard tree builder doesn't look at the end tag */
3021 res = treebuilder_handle_end(
3022 (TreeBuilderObject*) self->target, Py_None
3023 );
3024 else if (self->handle_end) {
3025 tag = makeuniversal(self, tag_in);
3026 if (tag) {
3027 res = PyObject_CallFunction(self->handle_end, "O", tag);
3028 Py_DECREF(tag);
3029 }
3030 }
3031
3032 Py_XDECREF(res);
3033}
3034
3035static void
3036expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3037 const XML_Char *uri)
3038{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003039 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3040 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003041
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003042 if (PyErr_Occurred())
3043 return;
3044
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003045 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003046 return;
3047
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003048 if (!uri)
3049 uri = "";
3050 if (!prefix)
3051 prefix = "";
3052
3053 parcel = Py_BuildValue("ss", prefix, uri);
3054 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003055 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003056 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3057 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003058}
3059
3060static void
3061expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3062{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003063 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3064
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003065 if (PyErr_Occurred())
3066 return;
3067
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003068 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003069 return;
3070
3071 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072}
3073
3074static void
3075expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3076{
3077 PyObject* comment;
3078 PyObject* res;
3079
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003080 if (PyErr_Occurred())
3081 return;
3082
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003083 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003084 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003085 if (comment) {
3086 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3087 Py_XDECREF(res);
3088 Py_DECREF(comment);
3089 }
3090 }
3091}
3092
Eli Bendersky45839902013-01-13 05:14:47 -08003093static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003094expat_start_doctype_handler(XMLParserObject *self,
3095 const XML_Char *doctype_name,
3096 const XML_Char *sysid,
3097 const XML_Char *pubid,
3098 int has_internal_subset)
3099{
3100 PyObject *self_pyobj = (PyObject *)self;
3101 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3102 PyObject *parser_doctype = NULL;
3103 PyObject *res = NULL;
3104
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003105 if (PyErr_Occurred())
3106 return;
3107
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003108 doctype_name_obj = makeuniversal(self, doctype_name);
3109 if (!doctype_name_obj)
3110 return;
3111
3112 if (sysid) {
3113 sysid_obj = makeuniversal(self, sysid);
3114 if (!sysid_obj) {
3115 Py_DECREF(doctype_name_obj);
3116 return;
3117 }
3118 } else {
3119 Py_INCREF(Py_None);
3120 sysid_obj = Py_None;
3121 }
3122
3123 if (pubid) {
3124 pubid_obj = makeuniversal(self, pubid);
3125 if (!pubid_obj) {
3126 Py_DECREF(doctype_name_obj);
3127 Py_DECREF(sysid_obj);
3128 return;
3129 }
3130 } else {
3131 Py_INCREF(Py_None);
3132 pubid_obj = Py_None;
3133 }
3134
3135 /* If the target has a handler for doctype, call it. */
3136 if (self->handle_doctype) {
3137 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3138 doctype_name_obj, pubid_obj, sysid_obj);
3139 Py_CLEAR(res);
3140 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003141 else {
3142 /* Now see if the parser itself has a doctype method. If yes and it's
3143 * a custom method, call it but warn about deprecation. If it's only
3144 * the vanilla XMLParser method, do nothing.
3145 */
3146 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3147 if (parser_doctype &&
3148 !(PyCFunction_Check(parser_doctype) &&
3149 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3150 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003151 (PyCFunction) _elementtree_XMLParser_doctype)) {
3152 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3153 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003154 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003155 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003156 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003157 res = PyObject_CallFunction(parser_doctype, "OOO",
3158 doctype_name_obj, pubid_obj, sysid_obj);
3159 Py_CLEAR(res);
3160 }
3161 }
3162
3163clear:
3164 Py_XDECREF(parser_doctype);
3165 Py_DECREF(doctype_name_obj);
3166 Py_DECREF(pubid_obj);
3167 Py_DECREF(sysid_obj);
3168}
3169
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003170static void
3171expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3172 const XML_Char* data_in)
3173{
3174 PyObject* target;
3175 PyObject* data;
3176 PyObject* res;
3177
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003178 if (PyErr_Occurred())
3179 return;
3180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003181 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003182 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3183 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184 if (target && data) {
3185 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3186 Py_XDECREF(res);
3187 Py_DECREF(data);
3188 Py_DECREF(target);
3189 } else {
3190 Py_XDECREF(data);
3191 Py_XDECREF(target);
3192 }
3193 }
3194}
3195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003197
Eli Bendersky52467b12012-06-01 07:13:08 +03003198static PyObject *
3199xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200{
Eli Bendersky52467b12012-06-01 07:13:08 +03003201 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3202 if (self) {
3203 self->parser = NULL;
3204 self->target = self->entity = self->names = NULL;
3205 self->handle_start = self->handle_data = self->handle_end = NULL;
3206 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003207 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003208 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003209 return (PyObject *)self;
3210}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211
Serhiy Storchakacb985562015-05-04 15:32:48 +03003212/*[clinic input]
3213_elementtree.XMLParser.__init__
3214
3215 html: object = NULL
3216 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003217 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003218
3219[clinic start generated code]*/
3220
Eli Bendersky52467b12012-06-01 07:13:08 +03003221static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003222_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3223 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003224/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003225{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003226 self->entity = PyDict_New();
3227 if (!self->entity)
3228 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229
Serhiy Storchakacb985562015-05-04 15:32:48 +03003230 self->names = PyDict_New();
3231 if (!self->names) {
3232 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003233 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003235
Serhiy Storchakacb985562015-05-04 15:32:48 +03003236 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3237 if (!self->parser) {
3238 Py_CLEAR(self->entity);
3239 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003241 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 }
3243
Eli Bendersky52467b12012-06-01 07:13:08 +03003244 if (target) {
3245 Py_INCREF(target);
3246 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003247 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003249 Py_CLEAR(self->entity);
3250 Py_CLEAR(self->names);
3251 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003255 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256
Serhiy Storchakacb985562015-05-04 15:32:48 +03003257 self->handle_start = PyObject_GetAttrString(target, "start");
3258 self->handle_data = PyObject_GetAttrString(target, "data");
3259 self->handle_end = PyObject_GetAttrString(target, "end");
3260 self->handle_comment = PyObject_GetAttrString(target, "comment");
3261 self->handle_pi = PyObject_GetAttrString(target, "pi");
3262 self->handle_close = PyObject_GetAttrString(target, "close");
3263 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003264
3265 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003266
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003268 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 (XML_StartElementHandler) expat_start_handler,
3272 (XML_EndElementHandler) expat_end_handler
3273 );
3274 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003275 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 (XML_DefaultHandler) expat_default_handler
3277 );
3278 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 (XML_CharacterDataHandler) expat_data_handler
3281 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003282 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285 (XML_CommentHandler) expat_comment_handler
3286 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003287 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 (XML_ProcessingInstructionHandler) expat_pi_handler
3291 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003292 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003294 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3295 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003298 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300
Eli Bendersky52467b12012-06-01 07:13:08 +03003301 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302}
3303
Eli Bendersky52467b12012-06-01 07:13:08 +03003304static int
3305xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3306{
3307 Py_VISIT(self->handle_close);
3308 Py_VISIT(self->handle_pi);
3309 Py_VISIT(self->handle_comment);
3310 Py_VISIT(self->handle_end);
3311 Py_VISIT(self->handle_data);
3312 Py_VISIT(self->handle_start);
3313
3314 Py_VISIT(self->target);
3315 Py_VISIT(self->entity);
3316 Py_VISIT(self->names);
3317
3318 return 0;
3319}
3320
3321static int
3322xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323{
3324 EXPAT(ParserFree)(self->parser);
3325
Antoine Pitrouc1948842012-10-01 23:40:37 +02003326 Py_CLEAR(self->handle_close);
3327 Py_CLEAR(self->handle_pi);
3328 Py_CLEAR(self->handle_comment);
3329 Py_CLEAR(self->handle_end);
3330 Py_CLEAR(self->handle_data);
3331 Py_CLEAR(self->handle_start);
3332 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333
Antoine Pitrouc1948842012-10-01 23:40:37 +02003334 Py_CLEAR(self->target);
3335 Py_CLEAR(self->entity);
3336 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
Eli Bendersky52467b12012-06-01 07:13:08 +03003338 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003339}
3340
Eli Bendersky52467b12012-06-01 07:13:08 +03003341static void
3342xmlparser_dealloc(XMLParserObject* self)
3343{
3344 PyObject_GC_UnTrack(self);
3345 xmlparser_gc_clear(self);
3346 Py_TYPE(self)->tp_free((PyObject *)self);
3347}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
3349LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003350expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351{
3352 int ok;
3353
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003354 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3356
3357 if (PyErr_Occurred())
3358 return NULL;
3359
3360 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003361 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003362 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003363 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003364 EXPAT(GetErrorColumnNumber)(self->parser),
3365 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366 );
3367 return NULL;
3368 }
3369
3370 Py_RETURN_NONE;
3371}
3372
Serhiy Storchakacb985562015-05-04 15:32:48 +03003373/*[clinic input]
3374_elementtree.XMLParser.close
3375
3376[clinic start generated code]*/
3377
3378static PyObject *
3379_elementtree_XMLParser_close_impl(XMLParserObject *self)
3380/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381{
3382 /* end feeding data to parser */
3383
3384 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003386 if (!res)
3387 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003388
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003389 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003390 Py_DECREF(res);
3391 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003392 }
3393 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003394 Py_DECREF(res);
3395 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003396 }
3397 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003398 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003399 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400}
3401
Serhiy Storchakacb985562015-05-04 15:32:48 +03003402/*[clinic input]
3403_elementtree.XMLParser.feed
3404
3405 data: object
3406 /
3407
3408[clinic start generated code]*/
3409
3410static PyObject *
3411_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3412/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003413{
3414 /* feed data to parser */
3415
Serhiy Storchakacb985562015-05-04 15:32:48 +03003416 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003417 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003418 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3419 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003420 return NULL;
3421 if (data_len > INT_MAX) {
3422 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3423 return NULL;
3424 }
3425 /* Explicitly set UTF-8 encoding. Return code ignored. */
3426 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003427 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003428 }
3429 else {
3430 Py_buffer view;
3431 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003432 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003433 return NULL;
3434 if (view.len > INT_MAX) {
3435 PyBuffer_Release(&view);
3436 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3437 return NULL;
3438 }
3439 res = expat_parse(self, view.buf, (int)view.len, 0);
3440 PyBuffer_Release(&view);
3441 return res;
3442 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003443}
3444
Serhiy Storchakacb985562015-05-04 15:32:48 +03003445/*[clinic input]
3446_elementtree.XMLParser._parse_whole
3447
3448 file: object
3449 /
3450
3451[clinic start generated code]*/
3452
3453static PyObject *
3454_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3455/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456{
Eli Benderskya3699232013-05-19 18:47:23 -07003457 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458 PyObject* reader;
3459 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003460 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461 PyObject* res;
3462
Serhiy Storchakacb985562015-05-04 15:32:48 +03003463 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464 if (!reader)
3465 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003466
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003467 /* read from open file object */
3468 for (;;) {
3469
3470 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3471
3472 if (!buffer) {
3473 /* read failed (e.g. due to KeyboardInterrupt) */
3474 Py_DECREF(reader);
3475 return NULL;
3476 }
3477
Eli Benderskyf996e772012-03-16 05:53:30 +02003478 if (PyUnicode_CheckExact(buffer)) {
3479 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003480 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003481 Py_DECREF(buffer);
3482 break;
3483 }
3484 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003485 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003486 if (!temp) {
3487 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003488 Py_DECREF(reader);
3489 return NULL;
3490 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003491 buffer = temp;
3492 }
3493 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494 Py_DECREF(buffer);
3495 break;
3496 }
3497
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003498 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3499 Py_DECREF(buffer);
3500 Py_DECREF(reader);
3501 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3502 return NULL;
3503 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003505 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003506 );
3507
3508 Py_DECREF(buffer);
3509
3510 if (!res) {
3511 Py_DECREF(reader);
3512 return NULL;
3513 }
3514 Py_DECREF(res);
3515
3516 }
3517
3518 Py_DECREF(reader);
3519
3520 res = expat_parse(self, "", 0, 1);
3521
3522 if (res && TreeBuilder_CheckExact(self->target)) {
3523 Py_DECREF(res);
3524 return treebuilder_done((TreeBuilderObject*) self->target);
3525 }
3526
3527 return res;
3528}
3529
Serhiy Storchakacb985562015-05-04 15:32:48 +03003530/*[clinic input]
3531_elementtree.XMLParser.doctype
3532
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003533 name: object
3534 pubid: object
3535 system: object
3536 /
3537
Serhiy Storchakacb985562015-05-04 15:32:48 +03003538[clinic start generated code]*/
3539
3540static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003541_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3542 PyObject *pubid, PyObject *system)
3543/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003544{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003545 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3546 "This method of XMLParser is deprecated. Define"
3547 " doctype() method on the TreeBuilder target.",
3548 1) < 0) {
3549 return NULL;
3550 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003551 Py_RETURN_NONE;
3552}
3553
Serhiy Storchakacb985562015-05-04 15:32:48 +03003554/*[clinic input]
3555_elementtree.XMLParser._setevents
3556
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003557 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003558 events_to_report: object = None
3559 /
3560
3561[clinic start generated code]*/
3562
3563static PyObject *
3564_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3565 PyObject *events_queue,
3566 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003567/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003568{
3569 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003570 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003571 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003572 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003573
3574 if (!TreeBuilder_CheckExact(self->target)) {
3575 PyErr_SetString(
3576 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003577 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578 "targets"
3579 );
3580 return NULL;
3581 }
3582
3583 target = (TreeBuilderObject*) self->target;
3584
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003585 events_append = PyObject_GetAttrString(events_queue, "append");
3586 if (events_append == NULL)
3587 return NULL;
Serhiy Storchakaea8c4312015-12-24 11:53:16 +02003588 Py_SETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589
3590 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003591 Py_CLEAR(target->start_event_obj);
3592 Py_CLEAR(target->end_event_obj);
3593 Py_CLEAR(target->start_ns_event_obj);
3594 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003595
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003596 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003597 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003598 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599 Py_RETURN_NONE;
3600 }
3601
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003602 if (!(events_seq = PySequence_Fast(events_to_report,
3603 "events must be a sequence"))) {
3604 return NULL;
3605 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003606
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003607 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003608 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3609 char *event_name = NULL;
3610 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003611 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003612 } else if (PyBytes_Check(event_name_obj)) {
3613 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003614 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003615 if (event_name == NULL) {
3616 Py_DECREF(events_seq);
3617 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3618 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003619 }
3620
3621 Py_INCREF(event_name_obj);
3622 if (strcmp(event_name, "start") == 0) {
3623 Py_SETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003624 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003625 Py_SETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003626 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003627 Py_SETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003628 EXPAT(SetNamespaceDeclHandler)(
3629 self->parser,
3630 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3631 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3632 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003634 Py_SETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635 EXPAT(SetNamespaceDeclHandler)(
3636 self->parser,
3637 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3638 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3639 );
3640 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003641 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 Py_DECREF(events_seq);
3643 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 return NULL;
3645 }
3646 }
3647
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003648 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003649 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003650}
3651
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003652static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003653xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003654{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003655 if (PyUnicode_Check(nameobj)) {
3656 PyObject* res;
3657 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3658 res = self->entity;
3659 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3660 res = self->target;
3661 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3662 return PyUnicode_FromFormat(
3663 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003665 }
3666 else
3667 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668
Alexander Belopolskye239d232010-12-08 23:31:48 +00003669 Py_INCREF(res);
3670 return res;
3671 }
3672 generic:
3673 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003674}
3675
Serhiy Storchakacb985562015-05-04 15:32:48 +03003676#include "clinic/_elementtree.c.h"
3677
3678static PyMethodDef element_methods[] = {
3679
3680 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3681
3682 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3683 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3684
3685 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3686 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3687 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3688
3689 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3690 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3691 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3692 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3693
3694 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3695 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3696 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3697
3698 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3699 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3700
3701 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3702 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3703
3704 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3705
3706 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3707 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3708 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3709 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3710 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3711
3712 {NULL, NULL}
3713};
3714
3715static PyMappingMethods element_as_mapping = {
3716 (lenfunc) element_length,
3717 (binaryfunc) element_subscr,
3718 (objobjargproc) element_ass_subscr,
3719};
3720
Serhiy Storchakadde08152015-11-25 15:28:13 +02003721static PyGetSetDef element_getsetlist[] = {
3722 {"tag",
3723 (getter)element_tag_getter,
3724 (setter)element_tag_setter,
3725 "A string identifying what kind of data this element represents"},
3726 {"text",
3727 (getter)element_text_getter,
3728 (setter)element_text_setter,
3729 "A string of text directly after the start tag, or None"},
3730 {"tail",
3731 (getter)element_tail_getter,
3732 (setter)element_tail_setter,
3733 "A string of text directly after the end tag, or None"},
3734 {"attrib",
3735 (getter)element_attrib_getter,
3736 (setter)element_attrib_setter,
3737 "A dictionary containing the element's attributes"},
3738 {NULL},
3739};
3740
Serhiy Storchakacb985562015-05-04 15:32:48 +03003741static PyTypeObject Element_Type = {
3742 PyVarObject_HEAD_INIT(NULL, 0)
3743 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3744 /* methods */
3745 (destructor)element_dealloc, /* tp_dealloc */
3746 0, /* tp_print */
3747 0, /* tp_getattr */
3748 0, /* tp_setattr */
3749 0, /* tp_reserved */
3750 (reprfunc)element_repr, /* tp_repr */
3751 0, /* tp_as_number */
3752 &element_as_sequence, /* tp_as_sequence */
3753 &element_as_mapping, /* tp_as_mapping */
3754 0, /* tp_hash */
3755 0, /* tp_call */
3756 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003757 PyObject_GenericGetAttr, /* tp_getattro */
3758 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003759 0, /* tp_as_buffer */
3760 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3761 /* tp_flags */
3762 0, /* tp_doc */
3763 (traverseproc)element_gc_traverse, /* tp_traverse */
3764 (inquiry)element_gc_clear, /* tp_clear */
3765 0, /* tp_richcompare */
3766 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3767 0, /* tp_iter */
3768 0, /* tp_iternext */
3769 element_methods, /* tp_methods */
3770 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003771 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003772 0, /* tp_base */
3773 0, /* tp_dict */
3774 0, /* tp_descr_get */
3775 0, /* tp_descr_set */
3776 0, /* tp_dictoffset */
3777 (initproc)element_init, /* tp_init */
3778 PyType_GenericAlloc, /* tp_alloc */
3779 element_new, /* tp_new */
3780 0, /* tp_free */
3781};
3782
3783static PyMethodDef treebuilder_methods[] = {
3784 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3785 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3786 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3787 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3788 {NULL, NULL}
3789};
3790
3791static PyTypeObject TreeBuilder_Type = {
3792 PyVarObject_HEAD_INIT(NULL, 0)
3793 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3794 /* methods */
3795 (destructor)treebuilder_dealloc, /* tp_dealloc */
3796 0, /* tp_print */
3797 0, /* tp_getattr */
3798 0, /* tp_setattr */
3799 0, /* tp_reserved */
3800 0, /* tp_repr */
3801 0, /* tp_as_number */
3802 0, /* tp_as_sequence */
3803 0, /* tp_as_mapping */
3804 0, /* tp_hash */
3805 0, /* tp_call */
3806 0, /* tp_str */
3807 0, /* tp_getattro */
3808 0, /* tp_setattro */
3809 0, /* tp_as_buffer */
3810 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3811 /* tp_flags */
3812 0, /* tp_doc */
3813 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3814 (inquiry)treebuilder_gc_clear, /* tp_clear */
3815 0, /* tp_richcompare */
3816 0, /* tp_weaklistoffset */
3817 0, /* tp_iter */
3818 0, /* tp_iternext */
3819 treebuilder_methods, /* tp_methods */
3820 0, /* tp_members */
3821 0, /* tp_getset */
3822 0, /* tp_base */
3823 0, /* tp_dict */
3824 0, /* tp_descr_get */
3825 0, /* tp_descr_set */
3826 0, /* tp_dictoffset */
3827 _elementtree_TreeBuilder___init__, /* tp_init */
3828 PyType_GenericAlloc, /* tp_alloc */
3829 treebuilder_new, /* tp_new */
3830 0, /* tp_free */
3831};
3832
3833static PyMethodDef xmlparser_methods[] = {
3834 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3835 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3836 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3837 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3838 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3839 {NULL, NULL}
3840};
3841
Neal Norwitz227b5332006-03-22 09:28:35 +00003842static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003843 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003844 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003845 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003846 (destructor)xmlparser_dealloc, /* tp_dealloc */
3847 0, /* tp_print */
3848 0, /* tp_getattr */
3849 0, /* tp_setattr */
3850 0, /* tp_reserved */
3851 0, /* tp_repr */
3852 0, /* tp_as_number */
3853 0, /* tp_as_sequence */
3854 0, /* tp_as_mapping */
3855 0, /* tp_hash */
3856 0, /* tp_call */
3857 0, /* tp_str */
3858 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3859 0, /* tp_setattro */
3860 0, /* tp_as_buffer */
3861 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3862 /* tp_flags */
3863 0, /* tp_doc */
3864 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3865 (inquiry)xmlparser_gc_clear, /* tp_clear */
3866 0, /* tp_richcompare */
3867 0, /* tp_weaklistoffset */
3868 0, /* tp_iter */
3869 0, /* tp_iternext */
3870 xmlparser_methods, /* tp_methods */
3871 0, /* tp_members */
3872 0, /* tp_getset */
3873 0, /* tp_base */
3874 0, /* tp_dict */
3875 0, /* tp_descr_get */
3876 0, /* tp_descr_set */
3877 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003878 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003879 PyType_GenericAlloc, /* tp_alloc */
3880 xmlparser_new, /* tp_new */
3881 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003882};
3883
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003884/* ==================================================================== */
3885/* python module interface */
3886
3887static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003888 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003889 {NULL, NULL}
3890};
3891
Martin v. Löwis1a214512008-06-11 05:26:20 +00003892
Eli Bendersky532d03e2013-08-10 08:00:39 -07003893static struct PyModuleDef elementtreemodule = {
3894 PyModuleDef_HEAD_INIT,
3895 "_elementtree",
3896 NULL,
3897 sizeof(elementtreestate),
3898 _functions,
3899 NULL,
3900 elementtree_traverse,
3901 elementtree_clear,
3902 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003903};
3904
Neal Norwitzf6657e62006-12-28 04:47:50 +00003905PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003906PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003907{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003908 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003909 elementtreestate *st;
3910
3911 m = PyState_FindModule(&elementtreemodule);
3912 if (m) {
3913 Py_INCREF(m);
3914 return m;
3915 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003916
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003917 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003918 if (PyType_Ready(&ElementIter_Type) < 0)
3919 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003920 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003921 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003922 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003923 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003924 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003925 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003926
Eli Bendersky532d03e2013-08-10 08:00:39 -07003927 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003928 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003929 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003930 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003931
Eli Bendersky828efde2012-04-05 05:40:58 +03003932 if (!(temp = PyImport_ImportModule("copy")))
3933 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003934 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003935 Py_XDECREF(temp);
3936
Eli Bendersky532d03e2013-08-10 08:00:39 -07003937 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003938 return NULL;
3939
Eli Bendersky20d41742012-06-01 09:48:37 +03003940 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003941 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3942 if (expat_capi) {
3943 /* check that it's usable */
3944 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003945 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003946 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3947 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003948 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003949 PyErr_SetString(PyExc_ImportError,
3950 "pyexpat version is incompatible");
3951 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003952 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003953 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003954 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003955 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003956
Eli Bendersky532d03e2013-08-10 08:00:39 -07003957 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003958 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003959 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003960 Py_INCREF(st->parseerror_obj);
3961 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003962
Eli Bendersky092af1f2012-03-04 07:14:03 +02003963 Py_INCREF((PyObject *)&Element_Type);
3964 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3965
Eli Bendersky58d548d2012-05-29 15:45:16 +03003966 Py_INCREF((PyObject *)&TreeBuilder_Type);
3967 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3968
Eli Bendersky52467b12012-06-01 07:13:08 +03003969 Py_INCREF((PyObject *)&XMLParser_Type);
3970 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003971
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003972 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003973}