blob: f16d48f829f1cbe744efca9e27bf582c9d5b91ba [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
399 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200401 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200402 Py_DECREF(tmp);
403
404 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 Py_DECREF(JOIN_OBJ(tmp));
408
409 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200411 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_DECREF(JOIN_OBJ(tmp));
413
414 return 0;
415}
416
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200420 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000421 PyObject* *children;
422
423 /* make sure self->children can hold the given number of extra
424 elements. set an exception and return -1 if allocation failed */
425
Victor Stinner5f0af232013-07-11 23:01:36 +0200426 if (!self->extra) {
427 if (create_extra(self, NULL) < 0)
428 return -1;
429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432
433 if (size > self->extra->allocated) {
434 /* use Python 2.4's list growth strategy */
435 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000436 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100437 * which needs at least 4 bytes.
438 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000439 * be safe.
440 */
441 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
443 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100446 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * false alarm always assume at least one child to be safe.
448 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 children = PyObject_Realloc(self->extra->children,
450 size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 } else {
454 children = PyObject_Malloc(size * sizeof(PyObject*));
455 if (!children)
456 goto nomemory;
457 /* copy existing children from static area to malloc buffer */
458 memcpy(children, self->extra->children,
459 self->extra->length * sizeof(PyObject*));
460 }
461 self->extra->children = children;
462 self->extra->allocated = size;
463 }
464
465 return 0;
466
467 nomemory:
468 PyErr_NoMemory();
469 return -1;
470}
471
472LOCAL(int)
473element_add_subelement(ElementObject* self, PyObject* element)
474{
475 /* add a child element to a parent */
476
477 if (element_resize(self, 1) < 0)
478 return -1;
479
480 Py_INCREF(element);
481 self->extra->children[self->extra->length] = element;
482
483 self->extra->length++;
484
485 return 0;
486}
487
488LOCAL(PyObject*)
489element_get_attrib(ElementObject* self)
490{
491 /* return borrowed reference to attrib dictionary */
492 /* note: this function assumes that the extra section exists */
493
494 PyObject* res = self->extra->attrib;
495
496 if (res == Py_None) {
497 /* create missing dictionary */
498 res = PyDict_New();
499 if (!res)
500 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200501 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000502 self->extra->attrib = res;
503 }
504
505 return res;
506}
507
508LOCAL(PyObject*)
509element_get_text(ElementObject* self)
510{
511 /* return borrowed reference to text attribute */
512
513 PyObject* res = self->text;
514
515 if (JOIN_GET(res)) {
516 res = JOIN_OBJ(res);
517 if (PyList_CheckExact(res)) {
518 res = list_join(res);
519 if (!res)
520 return NULL;
521 self->text = res;
522 }
523 }
524
525 return res;
526}
527
528LOCAL(PyObject*)
529element_get_tail(ElementObject* self)
530{
531 /* return borrowed reference to text attribute */
532
533 PyObject* res = self->tail;
534
535 if (JOIN_GET(res)) {
536 res = JOIN_OBJ(res);
537 if (PyList_CheckExact(res)) {
538 res = list_join(res);
539 if (!res)
540 return NULL;
541 self->tail = res;
542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300631
632 if (self->weakreflist != NULL)
633 PyObject_ClearWeakRefs((PyObject *) self);
634
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 /* element_gc_clear clears all references and deallocates extra
636 */
637 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638
639 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200640 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000641}
642
643/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
Serhiy Storchakacb985562015-05-04 15:32:48 +0300645/*[clinic input]
646_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648 subelement: object(subclass_of='&Element_Type')
649 /
650
651[clinic start generated code]*/
652
653static PyObject *
654_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
655/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
656{
657 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658 return NULL;
659
660 Py_RETURN_NONE;
661}
662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666[clinic start generated code]*/
667
668static PyObject *
669_elementtree_Element_clear_impl(ElementObject *self)
670/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
671{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300672 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 Py_INCREF(Py_None);
675 Py_DECREF(JOIN_OBJ(self->text));
676 self->text = Py_None;
677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->tail));
680 self->tail = Py_None;
681
682 Py_RETURN_NONE;
683}
684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.__copy__
687
688[clinic start generated code]*/
689
690static PyObject *
691_elementtree_Element___copy___impl(ElementObject *self)
692/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 ElementObject* element;
696
Eli Bendersky092af1f2012-03-04 07:14:03 +0200697 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800698 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699 if (!element)
700 return NULL;
701
702 Py_DECREF(JOIN_OBJ(element->text));
703 element->text = self->text;
704 Py_INCREF(JOIN_OBJ(element->text));
705
706 Py_DECREF(JOIN_OBJ(element->tail));
707 element->tail = self->tail;
708 Py_INCREF(JOIN_OBJ(element->tail));
709
710 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000711 if (element_resize(element, self->extra->length) < 0) {
712 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 for (i = 0; i < self->extra->length; i++) {
717 Py_INCREF(self->extra->children[i]);
718 element->extra->children[i] = self->extra->children[i];
719 }
720
721 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722 }
723
724 return (PyObject*) element;
725}
726
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200727/* Helper for a deep copy. */
728LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
729
Serhiy Storchakacb985562015-05-04 15:32:48 +0300730/*[clinic input]
731_elementtree.Element.__deepcopy__
732
733 memo: object
734 /
735
736[clinic start generated code]*/
737
738static PyObject *
739_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
740/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 ElementObject* element;
744 PyObject* tag;
745 PyObject* attrib;
746 PyObject* text;
747 PyObject* tail;
748 PyObject* id;
749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200802 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200820LOCAL(PyObject *)
821deepcopy(PyObject *object, PyObject *memo)
822{
823 /* do a deep copy of the given object */
824 PyObject *args;
825 PyObject *result;
826 elementtreestate *st;
827
828 /* Fast paths */
829 if (object == Py_None || PyUnicode_CheckExact(object)) {
830 Py_INCREF(object);
831 return object;
832 }
833
834 if (Py_REFCNT(object) == 1) {
835 if (PyDict_CheckExact(object)) {
836 PyObject *key, *value;
837 Py_ssize_t pos = 0;
838 int simple = 1;
839 while (PyDict_Next(object, &pos, &key, &value)) {
840 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
841 simple = 0;
842 break;
843 }
844 }
845 if (simple)
846 return PyDict_Copy(object);
847 /* Fall through to general case */
848 }
849 else if (Element_CheckExact(object)) {
850 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
851 }
852 }
853
854 /* General case */
855 st = ET_STATE_GLOBAL;
856 if (!st->deepcopy_obj) {
857 PyErr_SetString(PyExc_RuntimeError,
858 "deepcopy helper not found");
859 return NULL;
860 }
861
862 args = PyTuple_Pack(2, object, memo);
863 if (!args)
864 return NULL;
865 result = PyObject_CallObject(st->deepcopy_obj, args);
866 Py_DECREF(args);
867 return result;
868}
869
870
Serhiy Storchakacb985562015-05-04 15:32:48 +0300871/*[clinic input]
872_elementtree.Element.__sizeof__ -> Py_ssize_t
873
874[clinic start generated code]*/
875
876static Py_ssize_t
877_elementtree_Element___sizeof___impl(ElementObject *self)
878/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200880 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881 if (self->extra) {
882 result += sizeof(ElementObjectExtra);
883 if (self->extra->children != self->extra->_children)
884 result += sizeof(PyObject*) * self->extra->allocated;
885 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300886 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200887}
888
Eli Bendersky698bdb22013-01-10 06:01:06 -0800889/* dict keys for getstate/setstate. */
890#define PICKLED_TAG "tag"
891#define PICKLED_CHILDREN "_children"
892#define PICKLED_ATTRIB "attrib"
893#define PICKLED_TAIL "tail"
894#define PICKLED_TEXT "text"
895
896/* __getstate__ returns a fabricated instance dict as in the pure-Python
897 * Element implementation, for interoperability/interchangeability. This
898 * makes the pure-Python implementation details an API, but (a) there aren't
899 * any unnecessary structures there; and (b) it buys compatibility with 3.2
900 * pickles. See issue #16076.
901 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902/*[clinic input]
903_elementtree.Element.__getstate__
904
905[clinic start generated code]*/
906
Eli Bendersky698bdb22013-01-10 06:01:06 -0800907static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300908_elementtree_Element___getstate___impl(ElementObject *self)
909/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200911 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 PyObject *instancedict = NULL, *children;
913
914 /* Build a list of children. */
915 children = PyList_New(self->extra ? self->extra->length : 0);
916 if (!children)
917 return NULL;
918 for (i = 0; i < PyList_GET_SIZE(children); i++) {
919 PyObject *child = self->extra->children[i];
920 Py_INCREF(child);
921 PyList_SET_ITEM(children, i, child);
922 }
923
924 /* Construct the state object. */
925 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
926 if (noattrib)
927 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
928 PICKLED_TAG, self->tag,
929 PICKLED_CHILDREN, children,
930 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700931 PICKLED_TEXT, JOIN_OBJ(self->text),
932 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800933 else
934 instancedict = Py_BuildValue("{sOsOsOsOsO}",
935 PICKLED_TAG, self->tag,
936 PICKLED_CHILDREN, children,
937 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700938 PICKLED_TEXT, JOIN_OBJ(self->text),
939 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800940 if (instancedict) {
941 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800943 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944 else {
945 for (i = 0; i < PyList_GET_SIZE(children); i++)
946 Py_DECREF(PyList_GET_ITEM(children, i));
947 Py_DECREF(children);
948
949 return NULL;
950 }
951}
952
953static PyObject *
954element_setstate_from_attributes(ElementObject *self,
955 PyObject *tag,
956 PyObject *attrib,
957 PyObject *text,
958 PyObject *tail,
959 PyObject *children)
960{
961 Py_ssize_t i, nchildren;
962
963 if (!tag) {
964 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
965 return NULL;
966 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200968 Py_INCREF(tag);
969 Py_SETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800970
Eli Benderskydd3661e2013-09-13 06:24:25 -0700971 _clear_joined_ptr(&self->text);
972 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
973 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800974
Eli Benderskydd3661e2013-09-13 06:24:25 -0700975 _clear_joined_ptr(&self->tail);
976 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
977 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978
979 /* Handle ATTRIB and CHILDREN. */
980 if (!children && !attrib)
981 Py_RETURN_NONE;
982
983 /* Compute 'nchildren'. */
984 if (children) {
985 if (!PyList_Check(children)) {
986 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
987 return NULL;
988 }
989 nchildren = PyList_Size(children);
990 }
991 else {
992 nchildren = 0;
993 }
994
995 /* Allocate 'extra'. */
996 if (element_resize(self, nchildren)) {
997 return NULL;
998 }
999 assert(self->extra && self->extra->allocated >= nchildren);
1000
1001 /* Copy children */
1002 for (i = 0; i < nchildren; i++) {
1003 self->extra->children[i] = PyList_GET_ITEM(children, i);
1004 Py_INCREF(self->extra->children[i]);
1005 }
1006
1007 self->extra->length = nchildren;
1008 self->extra->allocated = nchildren;
1009
1010 /* Stash attrib. */
1011 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -08001012 Py_INCREF(attrib);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001013 Py_SETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001014 }
1015
1016 Py_RETURN_NONE;
1017}
1018
1019/* __setstate__ for Element instance from the Python implementation.
1020 * 'state' should be the instance dict.
1021 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001022
Eli Bendersky698bdb22013-01-10 06:01:06 -08001023static PyObject *
1024element_setstate_from_Python(ElementObject *self, PyObject *state)
1025{
1026 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1027 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1028 PyObject *args;
1029 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001030 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031
Eli Bendersky698bdb22013-01-10 06:01:06 -08001032 tag = attrib = text = tail = children = NULL;
1033 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001034 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001035 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036
1037 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1038 &attrib, &text, &tail, &children))
1039 retval = element_setstate_from_attributes(self, tag, attrib, text,
1040 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001041 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001042 retval = NULL;
1043
1044 Py_DECREF(args);
1045 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001046}
1047
Serhiy Storchakacb985562015-05-04 15:32:48 +03001048/*[clinic input]
1049_elementtree.Element.__setstate__
1050
1051 state: object
1052 /
1053
1054[clinic start generated code]*/
1055
Eli Bendersky698bdb22013-01-10 06:01:06 -08001056static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001057_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1058/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001059{
1060 if (!PyDict_CheckExact(state)) {
1061 PyErr_Format(PyExc_TypeError,
1062 "Don't know how to unpickle \"%.200R\" as an Element",
1063 state);
1064 return NULL;
1065 }
1066 else
1067 return element_setstate_from_Python(self, state);
1068}
1069
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001070LOCAL(int)
1071checkpath(PyObject* tag)
1072{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001073 Py_ssize_t i;
1074 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001075
1076 /* check if a tag contains an xpath character */
1077
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001078#define PATHCHAR(ch) \
1079 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001080
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001081 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1083 void *data = PyUnicode_DATA(tag);
1084 unsigned int kind = PyUnicode_KIND(tag);
1085 for (i = 0; i < len; i++) {
1086 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1087 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001088 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001091 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 return 1;
1093 }
1094 return 0;
1095 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001096 if (PyBytes_Check(tag)) {
1097 char *p = PyBytes_AS_STRING(tag);
1098 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001099 if (p[i] == '{')
1100 check = 0;
1101 else if (p[i] == '}')
1102 check = 1;
1103 else if (check && PATHCHAR(p[i]))
1104 return 1;
1105 }
1106 return 0;
1107 }
1108
1109 return 1; /* unknown type; might be path expression */
1110}
1111
Serhiy Storchakacb985562015-05-04 15:32:48 +03001112/*[clinic input]
1113_elementtree.Element.extend
1114
1115 elements: object
1116 /
1117
1118[clinic start generated code]*/
1119
1120static PyObject *
1121_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1122/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001123{
1124 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001125 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126
Serhiy Storchakacb985562015-05-04 15:32:48 +03001127 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128 if (!seq) {
1129 PyErr_Format(
1130 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001131 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001132 );
1133 return NULL;
1134 }
1135
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001136 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001137 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 Py_INCREF(element);
1139 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001140 PyErr_Format(
1141 PyExc_TypeError,
1142 "expected an Element, not \"%.200s\"",
1143 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001144 Py_DECREF(seq);
1145 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001146 return NULL;
1147 }
1148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (element_add_subelement(self, element) < 0) {
1150 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001151 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001152 return NULL;
1153 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001154 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001155 }
1156
1157 Py_DECREF(seq);
1158
1159 Py_RETURN_NONE;
1160}
1161
Serhiy Storchakacb985562015-05-04 15:32:48 +03001162/*[clinic input]
1163_elementtree.Element.find
1164
1165 path: object
1166 namespaces: object = None
1167
1168[clinic start generated code]*/
1169
1170static PyObject *
1171_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1172 PyObject *namespaces)
1173/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001175 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001176 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001177
Serhiy Storchakacb985562015-05-04 15:32:48 +03001178 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001179 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001180 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001181 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001182 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001183 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184
1185 if (!self->extra)
1186 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001188 for (i = 0; i < self->extra->length; i++) {
1189 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001190 int rc;
1191 if (!Element_CheckExact(item))
1192 continue;
1193 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001194 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001195 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001196 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001197 Py_DECREF(item);
1198 if (rc < 0)
1199 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001200 }
1201
1202 Py_RETURN_NONE;
1203}
1204
Serhiy Storchakacb985562015-05-04 15:32:48 +03001205/*[clinic input]
1206_elementtree.Element.findtext
1207
1208 path: object
1209 default: object = None
1210 namespaces: object = None
1211
1212[clinic start generated code]*/
1213
1214static PyObject *
1215_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1216 PyObject *default_value,
1217 PyObject *namespaces)
1218/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001220 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001221 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001222 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001223
Serhiy Storchakacb985562015-05-04 15:32:48 +03001224 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001225 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001226 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001227 );
1228
1229 if (!self->extra) {
1230 Py_INCREF(default_value);
1231 return default_value;
1232 }
1233
1234 for (i = 0; i < self->extra->length; i++) {
1235 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001236 int rc;
1237 if (!Element_CheckExact(item))
1238 continue;
1239 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001240 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001241 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 if (text == Py_None) {
1244 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001245 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001246 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001247 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001249 return text;
1250 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001251 Py_DECREF(item);
1252 if (rc < 0)
1253 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 }
1255
1256 Py_INCREF(default_value);
1257 return default_value;
1258}
1259
Serhiy Storchakacb985562015-05-04 15:32:48 +03001260/*[clinic input]
1261_elementtree.Element.findall
1262
1263 path: object
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1270 PyObject *namespaces)
1271/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001272{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001273 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001275 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001276 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001278 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001279 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001281 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001283 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284
1285 out = PyList_New(0);
1286 if (!out)
1287 return NULL;
1288
1289 if (!self->extra)
1290 return out;
1291
1292 for (i = 0; i < self->extra->length; i++) {
1293 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001294 int rc;
1295 if (!Element_CheckExact(item))
1296 continue;
1297 Py_INCREF(item);
1298 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1299 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1300 Py_DECREF(item);
1301 Py_DECREF(out);
1302 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001303 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001304 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 }
1306
1307 return out;
1308}
1309
Serhiy Storchakacb985562015-05-04 15:32:48 +03001310/*[clinic input]
1311_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001312
Serhiy Storchakacb985562015-05-04 15:32:48 +03001313 path: object
1314 namespaces: object = None
1315
1316[clinic start generated code]*/
1317
1318static PyObject *
1319_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1320 PyObject *namespaces)
1321/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1322{
1323 PyObject* tag = path;
1324 _Py_IDENTIFIER(iterfind);
1325 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001326
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001327 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001328 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001329}
1330
Serhiy Storchakacb985562015-05-04 15:32:48 +03001331/*[clinic input]
1332_elementtree.Element.get
1333
1334 key: object
1335 default: object = None
1336
1337[clinic start generated code]*/
1338
1339static PyObject *
1340_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1341 PyObject *default_value)
1342/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001343{
1344 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345
1346 if (!self->extra || self->extra->attrib == Py_None)
1347 value = default_value;
1348 else {
1349 value = PyDict_GetItem(self->extra->attrib, key);
1350 if (!value)
1351 value = default_value;
1352 }
1353
1354 Py_INCREF(value);
1355 return value;
1356}
1357
Serhiy Storchakacb985562015-05-04 15:32:48 +03001358/*[clinic input]
1359_elementtree.Element.getchildren
1360
1361[clinic start generated code]*/
1362
1363static PyObject *
1364_elementtree_Element_getchildren_impl(ElementObject *self)
1365/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001366{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001367 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368 PyObject* list;
1369
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001370 /* FIXME: report as deprecated? */
1371
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001372 if (!self->extra)
1373 return PyList_New(0);
1374
1375 list = PyList_New(self->extra->length);
1376 if (!list)
1377 return NULL;
1378
1379 for (i = 0; i < self->extra->length; i++) {
1380 PyObject* item = self->extra->children[i];
1381 Py_INCREF(item);
1382 PyList_SET_ITEM(list, i, item);
1383 }
1384
1385 return list;
1386}
1387
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001388
Eli Bendersky64d11e62012-06-15 07:42:50 +03001389static PyObject *
1390create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1391
1392
Serhiy Storchakacb985562015-05-04 15:32:48 +03001393/*[clinic input]
1394_elementtree.Element.iter
1395
1396 tag: object = None
1397
1398[clinic start generated code]*/
1399
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001401_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1402/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001404 if (PyUnicode_Check(tag)) {
1405 if (PyUnicode_READY(tag) < 0)
1406 return NULL;
1407 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1408 tag = Py_None;
1409 }
1410 else if (PyBytes_Check(tag)) {
1411 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1412 tag = Py_None;
1413 }
1414
Eli Bendersky64d11e62012-06-15 07:42:50 +03001415 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001416}
1417
1418
Serhiy Storchakacb985562015-05-04 15:32:48 +03001419/*[clinic input]
1420_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001421
Serhiy Storchakacb985562015-05-04 15:32:48 +03001422[clinic start generated code]*/
1423
1424static PyObject *
1425_elementtree_Element_itertext_impl(ElementObject *self)
1426/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1427{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001428 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001429}
1430
Eli Bendersky64d11e62012-06-15 07:42:50 +03001431
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001433element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001435 ElementObject* self = (ElementObject*) self_;
1436
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001437 if (!self->extra || index < 0 || index >= self->extra->length) {
1438 PyErr_SetString(
1439 PyExc_IndexError,
1440 "child index out of range"
1441 );
1442 return NULL;
1443 }
1444
1445 Py_INCREF(self->extra->children[index]);
1446 return self->extra->children[index];
1447}
1448
Serhiy Storchakacb985562015-05-04 15:32:48 +03001449/*[clinic input]
1450_elementtree.Element.insert
1451
1452 index: Py_ssize_t
1453 subelement: object(subclass_of='&Element_Type')
1454 /
1455
1456[clinic start generated code]*/
1457
1458static PyObject *
1459_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1460 PyObject *subelement)
1461/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001463 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
Victor Stinner5f0af232013-07-11 23:01:36 +02001465 if (!self->extra) {
1466 if (create_extra(self, NULL) < 0)
1467 return NULL;
1468 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001470 if (index < 0) {
1471 index += self->extra->length;
1472 if (index < 0)
1473 index = 0;
1474 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001475 if (index > self->extra->length)
1476 index = self->extra->length;
1477
1478 if (element_resize(self, 1) < 0)
1479 return NULL;
1480
1481 for (i = self->extra->length; i > index; i--)
1482 self->extra->children[i] = self->extra->children[i-1];
1483
Serhiy Storchakacb985562015-05-04 15:32:48 +03001484 Py_INCREF(subelement);
1485 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486
1487 self->extra->length++;
1488
1489 Py_RETURN_NONE;
1490}
1491
Serhiy Storchakacb985562015-05-04 15:32:48 +03001492/*[clinic input]
1493_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494
Serhiy Storchakacb985562015-05-04 15:32:48 +03001495[clinic start generated code]*/
1496
1497static PyObject *
1498_elementtree_Element_items_impl(ElementObject *self)
1499/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1500{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001501 if (!self->extra || self->extra->attrib == Py_None)
1502 return PyList_New(0);
1503
1504 return PyDict_Items(self->extra->attrib);
1505}
1506
Serhiy Storchakacb985562015-05-04 15:32:48 +03001507/*[clinic input]
1508_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001509
Serhiy Storchakacb985562015-05-04 15:32:48 +03001510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_keys_impl(ElementObject *self)
1514/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1515{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516 if (!self->extra || self->extra->attrib == Py_None)
1517 return PyList_New(0);
1518
1519 return PyDict_Keys(self->extra->attrib);
1520}
1521
Martin v. Löwis18e16552006-02-15 17:27:45 +00001522static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001523element_length(ElementObject* self)
1524{
1525 if (!self->extra)
1526 return 0;
1527
1528 return self->extra->length;
1529}
1530
Serhiy Storchakacb985562015-05-04 15:32:48 +03001531/*[clinic input]
1532_elementtree.Element.makeelement
1533
1534 tag: object
1535 attrib: object
1536 /
1537
1538[clinic start generated code]*/
1539
1540static PyObject *
1541_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1542 PyObject *attrib)
1543/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544{
1545 PyObject* elem;
1546
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001547 attrib = PyDict_Copy(attrib);
1548 if (!attrib)
1549 return NULL;
1550
Eli Bendersky092af1f2012-03-04 07:14:03 +02001551 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552
1553 Py_DECREF(attrib);
1554
1555 return elem;
1556}
1557
Serhiy Storchakacb985562015-05-04 15:32:48 +03001558/*[clinic input]
1559_elementtree.Element.remove
1560
1561 subelement: object(subclass_of='&Element_Type')
1562 /
1563
1564[clinic start generated code]*/
1565
1566static PyObject *
1567_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1568/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001569{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001570 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001571 int rc;
1572 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001574 if (!self->extra) {
1575 /* element has no children, so raise exception */
1576 PyErr_SetString(
1577 PyExc_ValueError,
1578 "list.remove(x): x not in list"
1579 );
1580 return NULL;
1581 }
1582
1583 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001584 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001585 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001586 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001587 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001589 if (rc < 0)
1590 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591 }
1592
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001593 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001594 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595 PyErr_SetString(
1596 PyExc_ValueError,
1597 "list.remove(x): x not in list"
1598 );
1599 return NULL;
1600 }
1601
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001602 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001603
1604 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605 for (; i < self->extra->length; i++)
1606 self->extra->children[i] = self->extra->children[i+1];
1607
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001608 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001609 Py_RETURN_NONE;
1610}
1611
1612static PyObject*
1613element_repr(ElementObject* self)
1614{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001615 if (self->tag)
1616 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1617 else
1618 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619}
1620
Serhiy Storchakacb985562015-05-04 15:32:48 +03001621/*[clinic input]
1622_elementtree.Element.set
1623
1624 key: object
1625 value: object
1626 /
1627
1628[clinic start generated code]*/
1629
1630static PyObject *
1631_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1632 PyObject *value)
1633/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001634{
1635 PyObject* attrib;
1636
Victor Stinner5f0af232013-07-11 23:01:36 +02001637 if (!self->extra) {
1638 if (create_extra(self, NULL) < 0)
1639 return NULL;
1640 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001641
1642 attrib = element_get_attrib(self);
1643 if (!attrib)
1644 return NULL;
1645
1646 if (PyDict_SetItem(attrib, key, value) < 0)
1647 return NULL;
1648
1649 Py_RETURN_NONE;
1650}
1651
1652static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001653element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001654{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001655 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001656 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001657 PyObject* old;
1658
1659 if (!self->extra || index < 0 || index >= self->extra->length) {
1660 PyErr_SetString(
1661 PyExc_IndexError,
1662 "child assignment index out of range");
1663 return -1;
1664 }
1665
1666 old = self->extra->children[index];
1667
1668 if (item) {
1669 Py_INCREF(item);
1670 self->extra->children[index] = item;
1671 } else {
1672 self->extra->length--;
1673 for (i = index; i < self->extra->length; i++)
1674 self->extra->children[i] = self->extra->children[i+1];
1675 }
1676
1677 Py_DECREF(old);
1678
1679 return 0;
1680}
1681
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001682static PyObject*
1683element_subscr(PyObject* self_, PyObject* item)
1684{
1685 ElementObject* self = (ElementObject*) self_;
1686
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001687 if (PyIndex_Check(item)) {
1688 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689
1690 if (i == -1 && PyErr_Occurred()) {
1691 return NULL;
1692 }
1693 if (i < 0 && self->extra)
1694 i += self->extra->length;
1695 return element_getitem(self_, i);
1696 }
1697 else if (PySlice_Check(item)) {
1698 Py_ssize_t start, stop, step, slicelen, cur, i;
1699 PyObject* list;
1700
1701 if (!self->extra)
1702 return PyList_New(0);
1703
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001704 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001705 self->extra->length,
1706 &start, &stop, &step, &slicelen) < 0) {
1707 return NULL;
1708 }
1709
1710 if (slicelen <= 0)
1711 return PyList_New(0);
1712 else {
1713 list = PyList_New(slicelen);
1714 if (!list)
1715 return NULL;
1716
1717 for (cur = start, i = 0; i < slicelen;
1718 cur += step, i++) {
1719 PyObject* item = self->extra->children[cur];
1720 Py_INCREF(item);
1721 PyList_SET_ITEM(list, i, item);
1722 }
1723
1724 return list;
1725 }
1726 }
1727 else {
1728 PyErr_SetString(PyExc_TypeError,
1729 "element indices must be integers");
1730 return NULL;
1731 }
1732}
1733
1734static int
1735element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1736{
1737 ElementObject* self = (ElementObject*) self_;
1738
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001739 if (PyIndex_Check(item)) {
1740 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741
1742 if (i == -1 && PyErr_Occurred()) {
1743 return -1;
1744 }
1745 if (i < 0 && self->extra)
1746 i += self->extra->length;
1747 return element_setitem(self_, i, value);
1748 }
1749 else if (PySlice_Check(item)) {
1750 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1751
1752 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001753 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001754
Victor Stinner5f0af232013-07-11 23:01:36 +02001755 if (!self->extra) {
1756 if (create_extra(self, NULL) < 0)
1757 return -1;
1758 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001759
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001760 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001761 self->extra->length,
1762 &start, &stop, &step, &slicelen) < 0) {
1763 return -1;
1764 }
1765
Eli Bendersky865756a2012-03-09 13:38:15 +02001766 if (value == NULL) {
1767 /* Delete slice */
1768 size_t cur;
1769 Py_ssize_t i;
1770
1771 if (slicelen <= 0)
1772 return 0;
1773
1774 /* Since we're deleting, the direction of the range doesn't matter,
1775 * so for simplicity make it always ascending.
1776 */
1777 if (step < 0) {
1778 stop = start + 1;
1779 start = stop + step * (slicelen - 1) - 1;
1780 step = -step;
1781 }
1782
1783 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1784
1785 /* recycle is a list that will contain all the children
1786 * scheduled for removal.
1787 */
1788 if (!(recycle = PyList_New(slicelen))) {
1789 PyErr_NoMemory();
1790 return -1;
1791 }
1792
1793 /* This loop walks over all the children that have to be deleted,
1794 * with cur pointing at them. num_moved is the amount of children
1795 * until the next deleted child that have to be "shifted down" to
1796 * occupy the deleted's places.
1797 * Note that in the ith iteration, shifting is done i+i places down
1798 * because i children were already removed.
1799 */
1800 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1801 /* Compute how many children have to be moved, clipping at the
1802 * list end.
1803 */
1804 Py_ssize_t num_moved = step - 1;
1805 if (cur + step >= (size_t)self->extra->length) {
1806 num_moved = self->extra->length - cur - 1;
1807 }
1808
1809 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1810
1811 memmove(
1812 self->extra->children + cur - i,
1813 self->extra->children + cur + 1,
1814 num_moved * sizeof(PyObject *));
1815 }
1816
1817 /* Leftover "tail" after the last removed child */
1818 cur = start + (size_t)slicelen * step;
1819 if (cur < (size_t)self->extra->length) {
1820 memmove(
1821 self->extra->children + cur - slicelen,
1822 self->extra->children + cur,
1823 (self->extra->length - cur) * sizeof(PyObject *));
1824 }
1825
1826 self->extra->length -= slicelen;
1827
1828 /* Discard the recycle list with all the deleted sub-elements */
1829 Py_XDECREF(recycle);
1830 return 0;
1831 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001832
1833 /* A new slice is actually being assigned */
1834 seq = PySequence_Fast(value, "");
1835 if (!seq) {
1836 PyErr_Format(
1837 PyExc_TypeError,
1838 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1839 );
1840 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001842 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001843
1844 if (step != 1 && newlen != slicelen)
1845 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001846 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001847 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001848 "attempt to assign sequence of size %zd "
1849 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850 newlen, slicelen
1851 );
1852 return -1;
1853 }
1854
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 /* Resize before creating the recycle bin, to prevent refleaks. */
1856 if (newlen > slicelen) {
1857 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001858 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 return -1;
1860 }
1861 }
1862
1863 if (slicelen > 0) {
1864 /* to avoid recursive calls to this method (via decref), move
1865 old items to the recycle bin here, and get rid of them when
1866 we're done modifying the element */
1867 recycle = PyList_New(slicelen);
1868 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870 return -1;
1871 }
1872 for (cur = start, i = 0; i < slicelen;
1873 cur += step, i++)
1874 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1875 }
1876
1877 if (newlen < slicelen) {
1878 /* delete slice */
1879 for (i = stop; i < self->extra->length; i++)
1880 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1881 } else if (newlen > slicelen) {
1882 /* insert slice */
1883 for (i = self->extra->length-1; i >= stop; i--)
1884 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1885 }
1886
1887 /* replace the slice */
1888 for (cur = start, i = 0; i < newlen;
1889 cur += step, i++) {
1890 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1891 Py_INCREF(element);
1892 self->extra->children[cur] = element;
1893 }
1894
1895 self->extra->length += newlen - slicelen;
1896
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001897 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001898
1899 /* discard the recycle bin, and everything in it */
1900 Py_XDECREF(recycle);
1901
1902 return 0;
1903 }
1904 else {
1905 PyErr_SetString(PyExc_TypeError,
1906 "element indices must be integers");
1907 return -1;
1908 }
1909}
1910
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001911static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001912element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001913{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001914 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001915 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001916 return res;
1917}
1918
Serhiy Storchakadde08152015-11-25 15:28:13 +02001919static PyObject*
1920element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001921{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001922 PyObject *res = element_get_text(self);
1923 Py_XINCREF(res);
1924 return res;
1925}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001926
Serhiy Storchakadde08152015-11-25 15:28:13 +02001927static PyObject*
1928element_tail_getter(ElementObject *self, void *closure)
1929{
1930 PyObject *res = element_get_tail(self);
1931 Py_XINCREF(res);
1932 return res;
1933}
1934
1935static PyObject*
1936element_attrib_getter(ElementObject *self, void *closure)
1937{
1938 PyObject *res;
1939 if (!self->extra) {
1940 if (create_extra(self, NULL) < 0)
1941 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001942 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001943 res = element_get_attrib(self);
1944 Py_XINCREF(res);
1945 return res;
1946}
Victor Stinner4d463432013-07-11 23:05:03 +02001947
Serhiy Storchakadde08152015-11-25 15:28:13 +02001948/* macro for setter validation */
1949#define _VALIDATE_ATTR_VALUE(V) \
1950 if ((V) == NULL) { \
1951 PyErr_SetString( \
1952 PyExc_AttributeError, \
1953 "can't delete element attribute"); \
1954 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001955 }
1956
Serhiy Storchakadde08152015-11-25 15:28:13 +02001957static int
1958element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1959{
1960 _VALIDATE_ATTR_VALUE(value);
1961 Py_INCREF(value);
Serhiy Storchaka726fc132015-12-27 15:44:33 +02001962 Py_SETREF(self->tag, value);
Serhiy Storchakadde08152015-11-25 15:28:13 +02001963 return 0;
1964}
1965
1966static int
1967element_text_setter(ElementObject *self, PyObject *value, void *closure)
1968{
1969 _VALIDATE_ATTR_VALUE(value);
1970 Py_INCREF(value);
1971 Py_DECREF(JOIN_OBJ(self->text));
1972 self->text = value;
1973 return 0;
1974}
1975
1976static int
1977element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1978{
1979 _VALIDATE_ATTR_VALUE(value);
1980 Py_INCREF(value);
1981 Py_DECREF(JOIN_OBJ(self->tail));
1982 self->tail = value;
1983 return 0;
1984}
1985
1986static int
1987element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1988{
1989 _VALIDATE_ATTR_VALUE(value);
1990 if (!self->extra) {
1991 if (create_extra(self, NULL) < 0)
1992 return -1;
1993 }
1994 Py_INCREF(value);
Serhiy Storchaka726fc132015-12-27 15:44:33 +02001995 Py_SETREF(self->extra->attrib, value);
Eli Benderskyef9683b2013-05-18 07:52:34 -07001996 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001997}
1998
1999static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002000 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002001 0, /* sq_concat */
2002 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002003 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002004 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002006 0,
2007};
2008
Eli Bendersky64d11e62012-06-15 07:42:50 +03002009/******************************* Element iterator ****************************/
2010
2011/* ElementIterObject represents the iteration state over an XML element in
2012 * pre-order traversal. To keep track of which sub-element should be returned
2013 * next, a stack of parents is maintained. This is a standard stack-based
2014 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002015 * The stack is managed using a continuous array.
2016 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002017 * the current one is exhausted, and the next child to examine in that parent.
2018 */
2019typedef struct ParentLocator_t {
2020 ElementObject *parent;
2021 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002022} ParentLocator;
2023
2024typedef struct {
2025 PyObject_HEAD
2026 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002027 Py_ssize_t parent_stack_used;
2028 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002029 ElementObject *root_element;
2030 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002031 int gettext;
2032} ElementIterObject;
2033
2034
2035static void
2036elementiter_dealloc(ElementIterObject *it)
2037{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002038 Py_ssize_t i = it->parent_stack_used;
2039 it->parent_stack_used = 0;
2040 while (i--)
2041 Py_XDECREF(it->parent_stack[i].parent);
2042 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002043
2044 Py_XDECREF(it->sought_tag);
2045 Py_XDECREF(it->root_element);
2046
2047 PyObject_GC_UnTrack(it);
2048 PyObject_GC_Del(it);
2049}
2050
2051static int
2052elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2053{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002054 Py_ssize_t i = it->parent_stack_used;
2055 while (i--)
2056 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002057
2058 Py_VISIT(it->root_element);
2059 Py_VISIT(it->sought_tag);
2060 return 0;
2061}
2062
2063/* Helper function for elementiter_next. Add a new parent to the parent stack.
2064 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002065static int
2066parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002067{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002068 ParentLocator *item;
2069
2070 if (it->parent_stack_used >= it->parent_stack_size) {
2071 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2072 ParentLocator *parent_stack = it->parent_stack;
2073 PyMem_Resize(parent_stack, ParentLocator, new_size);
2074 if (parent_stack == NULL)
2075 return -1;
2076 it->parent_stack = parent_stack;
2077 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002078 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002079 item = it->parent_stack + it->parent_stack_used++;
2080 Py_INCREF(parent);
2081 item->parent = parent;
2082 item->child_index = 0;
2083 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002084}
2085
2086static PyObject *
2087elementiter_next(ElementIterObject *it)
2088{
2089 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002090 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002091 * A short note on gettext: this function serves both the iter() and
2092 * itertext() methods to avoid code duplication. However, there are a few
2093 * small differences in the way these iterations work. Namely:
2094 * - itertext() only yields text from nodes that have it, and continues
2095 * iterating when a node doesn't have text (so it doesn't return any
2096 * node like iter())
2097 * - itertext() also has to handle tail, after finishing with all the
2098 * children of a node.
2099 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002100 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002101 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002102 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103
2104 while (1) {
2105 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002106 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002107 * iterator is exhausted.
2108 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002109 if (!it->parent_stack_used) {
2110 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111 PyErr_SetNone(PyExc_StopIteration);
2112 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002113 }
2114
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002115 elem = it->root_element; /* steals a reference */
2116 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 }
2118 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119 /* See if there are children left to traverse in the current parent. If
2120 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002122 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2123 Py_ssize_t child_index = item->child_index;
2124 ElementObjectExtra *extra;
2125 elem = item->parent;
2126 extra = elem->extra;
2127 if (!extra || child_index >= extra->length) {
2128 it->parent_stack_used--;
2129 /* Note that extra condition on it->parent_stack_used here;
2130 * this is because itertext() is supposed to only return *inner*
2131 * text, not text following the element it began iteration with.
2132 */
2133 if (it->gettext && it->parent_stack_used) {
2134 text = element_get_tail(elem);
2135 goto gettext;
2136 }
2137 Py_DECREF(elem);
2138 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002139 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002140
2141 elem = (ElementObject *)extra->children[child_index];
2142 item->child_index++;
2143 Py_INCREF(elem);
2144 }
2145
2146 if (parent_stack_push_new(it, elem) < 0) {
2147 Py_DECREF(elem);
2148 PyErr_NoMemory();
2149 return NULL;
2150 }
2151 if (it->gettext) {
2152 text = element_get_text(elem);
2153 goto gettext;
2154 }
2155
2156 if (it->sought_tag == Py_None)
2157 return (PyObject *)elem;
2158
2159 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2160 if (rc > 0)
2161 return (PyObject *)elem;
2162
2163 Py_DECREF(elem);
2164 if (rc < 0)
2165 return NULL;
2166 continue;
2167
2168gettext:
2169 if (!text) {
2170 Py_DECREF(elem);
2171 return NULL;
2172 }
2173 if (text == Py_None) {
2174 Py_DECREF(elem);
2175 }
2176 else {
2177 Py_INCREF(text);
2178 Py_DECREF(elem);
2179 rc = PyObject_IsTrue(text);
2180 if (rc > 0)
2181 return text;
2182 Py_DECREF(text);
2183 if (rc < 0)
2184 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002185 }
2186 }
2187
2188 return NULL;
2189}
2190
2191
2192static PyTypeObject ElementIter_Type = {
2193 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002194 /* Using the module's name since the pure-Python implementation does not
2195 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002196 "_elementtree._element_iterator", /* tp_name */
2197 sizeof(ElementIterObject), /* tp_basicsize */
2198 0, /* tp_itemsize */
2199 /* methods */
2200 (destructor)elementiter_dealloc, /* tp_dealloc */
2201 0, /* tp_print */
2202 0, /* tp_getattr */
2203 0, /* tp_setattr */
2204 0, /* tp_reserved */
2205 0, /* tp_repr */
2206 0, /* tp_as_number */
2207 0, /* tp_as_sequence */
2208 0, /* tp_as_mapping */
2209 0, /* tp_hash */
2210 0, /* tp_call */
2211 0, /* tp_str */
2212 0, /* tp_getattro */
2213 0, /* tp_setattro */
2214 0, /* tp_as_buffer */
2215 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2216 0, /* tp_doc */
2217 (traverseproc)elementiter_traverse, /* tp_traverse */
2218 0, /* tp_clear */
2219 0, /* tp_richcompare */
2220 0, /* tp_weaklistoffset */
2221 PyObject_SelfIter, /* tp_iter */
2222 (iternextfunc)elementiter_next, /* tp_iternext */
2223 0, /* tp_methods */
2224 0, /* tp_members */
2225 0, /* tp_getset */
2226 0, /* tp_base */
2227 0, /* tp_dict */
2228 0, /* tp_descr_get */
2229 0, /* tp_descr_set */
2230 0, /* tp_dictoffset */
2231 0, /* tp_init */
2232 0, /* tp_alloc */
2233 0, /* tp_new */
2234};
2235
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002236#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237
2238static PyObject *
2239create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2240{
2241 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002242
2243 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2244 if (!it)
2245 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002246
Victor Stinner4d463432013-07-11 23:05:03 +02002247 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002248 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002249 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002250 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251 it->root_element = self;
2252
Eli Bendersky64d11e62012-06-15 07:42:50 +03002253 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002254
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002255 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002256 if (it->parent_stack == NULL) {
2257 Py_DECREF(it);
2258 PyErr_NoMemory();
2259 return NULL;
2260 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002261 it->parent_stack_used = 0;
2262 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002263
Eli Bendersky64d11e62012-06-15 07:42:50 +03002264 return (PyObject *)it;
2265}
2266
2267
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002268/* ==================================================================== */
2269/* the tree builder type */
2270
2271typedef struct {
2272 PyObject_HEAD
2273
Eli Bendersky58d548d2012-05-29 15:45:16 +03002274 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275
Antoine Pitrouee329312012-10-04 19:53:29 +02002276 PyObject *this; /* current node */
2277 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278
Eli Bendersky58d548d2012-05-29 15:45:16 +03002279 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280
Eli Bendersky58d548d2012-05-29 15:45:16 +03002281 PyObject *stack; /* element stack */
2282 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283
Eli Bendersky48d358b2012-05-30 17:57:50 +03002284 PyObject *element_factory;
2285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002286 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002287 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002288 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2289 PyObject *end_event_obj;
2290 PyObject *start_ns_event_obj;
2291 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002292} TreeBuilderObject;
2293
Christian Heimes90aa7642007-12-19 02:45:37 +00002294#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002295
2296/* -------------------------------------------------------------------- */
2297/* constructor and destructor */
2298
Eli Bendersky58d548d2012-05-29 15:45:16 +03002299static PyObject *
2300treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002301{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002302 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2303 if (t != NULL) {
2304 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002307 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002308 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002309 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310
Eli Bendersky58d548d2012-05-29 15:45:16 +03002311 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002312 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 t->stack = PyList_New(20);
2314 if (!t->stack) {
2315 Py_DECREF(t->this);
2316 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002317 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 return NULL;
2319 }
2320 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002322 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 t->start_event_obj = t->end_event_obj = NULL;
2324 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2325 }
2326 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327}
2328
Serhiy Storchakacb985562015-05-04 15:32:48 +03002329/*[clinic input]
2330_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002331
Serhiy Storchakacb985562015-05-04 15:32:48 +03002332 element_factory: object = NULL
2333
2334[clinic start generated code]*/
2335
2336static int
2337_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2338 PyObject *element_factory)
2339/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2340{
2341 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002342
2343 if (element_factory) {
2344 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002345 tmp = self->element_factory;
2346 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002347 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002348 }
2349
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002351}
2352
Eli Bendersky48d358b2012-05-30 17:57:50 +03002353static int
2354treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2355{
2356 Py_VISIT(self->root);
2357 Py_VISIT(self->this);
2358 Py_VISIT(self->last);
2359 Py_VISIT(self->data);
2360 Py_VISIT(self->stack);
2361 Py_VISIT(self->element_factory);
2362 return 0;
2363}
2364
2365static int
2366treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002367{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002368 Py_CLEAR(self->end_ns_event_obj);
2369 Py_CLEAR(self->start_ns_event_obj);
2370 Py_CLEAR(self->end_event_obj);
2371 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002372 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002373 Py_CLEAR(self->stack);
2374 Py_CLEAR(self->data);
2375 Py_CLEAR(self->last);
2376 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002377 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002378 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002379 return 0;
2380}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002381
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382static void
2383treebuilder_dealloc(TreeBuilderObject *self)
2384{
2385 PyObject_GC_UnTrack(self);
2386 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002387 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388}
2389
2390/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002391/* helpers for handling of arbitrary element-like objects */
2392
2393static int
2394treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2395 PyObject **dest, _Py_Identifier *name)
2396{
2397 if (Element_CheckExact(element)) {
2398 Py_DECREF(JOIN_OBJ(*dest));
2399 *dest = JOIN_SET(data, PyList_CheckExact(data));
2400 return 0;
2401 }
2402 else {
2403 PyObject *joined = list_join(data);
2404 int r;
2405 if (joined == NULL)
2406 return -1;
2407 r = _PyObject_SetAttrId(element, name, joined);
2408 Py_DECREF(joined);
2409 return r;
2410 }
2411}
2412
2413/* These two functions steal a reference to data */
2414static int
2415treebuilder_set_element_text(PyObject *element, PyObject *data)
2416{
2417 _Py_IDENTIFIER(text);
2418 return treebuilder_set_element_text_or_tail(
2419 element, data, &((ElementObject *) element)->text, &PyId_text);
2420}
2421
2422static int
2423treebuilder_set_element_tail(PyObject *element, PyObject *data)
2424{
2425 _Py_IDENTIFIER(tail);
2426 return treebuilder_set_element_text_or_tail(
2427 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2428}
2429
2430static int
2431treebuilder_add_subelement(PyObject *element, PyObject *child)
2432{
2433 _Py_IDENTIFIER(append);
2434 if (Element_CheckExact(element)) {
2435 ElementObject *elem = (ElementObject *) element;
2436 return element_add_subelement(elem, child);
2437 }
2438 else {
2439 PyObject *res;
2440 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2441 if (res == NULL)
2442 return -1;
2443 Py_DECREF(res);
2444 return 0;
2445 }
2446}
2447
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002448LOCAL(int)
2449treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2450 PyObject *node)
2451{
2452 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002453 PyObject *res;
2454 PyObject *event = PyTuple_Pack(2, action, node);
2455 if (event == NULL)
2456 return -1;
2457 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2458 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002459 if (res == NULL)
2460 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002461 Py_DECREF(res);
2462 }
2463 return 0;
2464}
2465
Antoine Pitrouee329312012-10-04 19:53:29 +02002466/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002467/* handlers */
2468
2469LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002470treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2471 PyObject* attrib)
2472{
2473 PyObject* node;
2474 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002475 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476
2477 if (self->data) {
2478 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002479 if (treebuilder_set_element_text(self->last, self->data))
2480 return NULL;
2481 }
2482 else {
2483 if (treebuilder_set_element_tail(self->last, self->data))
2484 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002485 }
2486 self->data = NULL;
2487 }
2488
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002489 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002490 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002491 } else if (attrib == Py_None) {
2492 attrib = PyDict_New();
2493 if (!attrib)
2494 return NULL;
2495 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2496 Py_DECREF(attrib);
2497 }
2498 else {
2499 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002500 }
2501 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002503 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504
Antoine Pitrouee329312012-10-04 19:53:29 +02002505 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506
2507 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002508 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002509 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510 } else {
2511 if (self->root) {
2512 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002513 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514 "multiple elements on top level"
2515 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002516 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 }
2518 Py_INCREF(node);
2519 self->root = node;
2520 }
2521
2522 if (self->index < PyList_GET_SIZE(self->stack)) {
2523 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002524 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525 Py_INCREF(this);
2526 } else {
2527 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002528 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 }
2530 self->index++;
2531
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002532 Py_INCREF(node);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002533 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 Py_INCREF(node);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002535 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002537 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2538 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539
2540 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002541
2542 error:
2543 Py_DECREF(node);
2544 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002545}
2546
2547LOCAL(PyObject*)
2548treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2549{
2550 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002551 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002552 /* ignore calls to data before the first call to start */
2553 Py_RETURN_NONE;
2554 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 /* store the first item as is */
2556 Py_INCREF(data); self->data = data;
2557 } else {
2558 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002559 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2560 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002561 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 /* expat often generates single character data sections; handle
2563 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002564 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2565 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002567 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 } else if (PyList_CheckExact(self->data)) {
2569 if (PyList_Append(self->data, data) < 0)
2570 return NULL;
2571 } else {
2572 PyObject* list = PyList_New(2);
2573 if (!list)
2574 return NULL;
2575 PyList_SET_ITEM(list, 0, self->data);
2576 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2577 self->data = list;
2578 }
2579 }
2580
2581 Py_RETURN_NONE;
2582}
2583
2584LOCAL(PyObject*)
2585treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2586{
2587 PyObject* item;
2588
2589 if (self->data) {
2590 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002591 if (treebuilder_set_element_text(self->last, self->data))
2592 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002594 if (treebuilder_set_element_tail(self->last, self->data))
2595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 }
2597 self->data = NULL;
2598 }
2599
2600 if (self->index == 0) {
2601 PyErr_SetString(
2602 PyExc_IndexError,
2603 "pop from empty stack"
2604 );
2605 return NULL;
2606 }
2607
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002608 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002609 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002610 self->index--;
2611 self->this = PyList_GET_ITEM(self->stack, self->index);
2612 Py_INCREF(self->this);
2613 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002615 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617
2618 Py_INCREF(self->last);
2619 return (PyObject*) self->last;
2620}
2621
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002622/* -------------------------------------------------------------------- */
2623/* methods (in alphabetical order) */
2624
Serhiy Storchakacb985562015-05-04 15:32:48 +03002625/*[clinic input]
2626_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627
Serhiy Storchakacb985562015-05-04 15:32:48 +03002628 data: object
2629 /
2630
2631[clinic start generated code]*/
2632
2633static PyObject *
2634_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2635/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2636{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637 return treebuilder_handle_data(self, data);
2638}
2639
Serhiy Storchakacb985562015-05-04 15:32:48 +03002640/*[clinic input]
2641_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002642
Serhiy Storchakacb985562015-05-04 15:32:48 +03002643 tag: object
2644 /
2645
2646[clinic start generated code]*/
2647
2648static PyObject *
2649_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2650/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2651{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652 return treebuilder_handle_end(self, tag);
2653}
2654
2655LOCAL(PyObject*)
2656treebuilder_done(TreeBuilderObject* self)
2657{
2658 PyObject* res;
2659
2660 /* FIXME: check stack size? */
2661
2662 if (self->root)
2663 res = self->root;
2664 else
2665 res = Py_None;
2666
2667 Py_INCREF(res);
2668 return res;
2669}
2670
Serhiy Storchakacb985562015-05-04 15:32:48 +03002671/*[clinic input]
2672_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673
Serhiy Storchakacb985562015-05-04 15:32:48 +03002674[clinic start generated code]*/
2675
2676static PyObject *
2677_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2678/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2679{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002680 return treebuilder_done(self);
2681}
2682
Serhiy Storchakacb985562015-05-04 15:32:48 +03002683/*[clinic input]
2684_elementtree.TreeBuilder.start
2685
2686 tag: object
2687 attrs: object = None
2688 /
2689
2690[clinic start generated code]*/
2691
2692static PyObject *
2693_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2694 PyObject *attrs)
2695/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002697 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698}
2699
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700/* ==================================================================== */
2701/* the expat interface */
2702
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002703#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002705
2706/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2707 * cached globally without being in per-module state.
2708 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002709static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711
Eli Bendersky52467b12012-06-01 07:13:08 +03002712static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2713 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2714
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715typedef struct {
2716 PyObject_HEAD
2717
2718 XML_Parser parser;
2719
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002720 PyObject *target;
2721 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002723 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002725 PyObject *handle_start;
2726 PyObject *handle_data;
2727 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *handle_comment;
2730 PyObject *handle_pi;
2731 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002733 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002734
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735} XMLParserObject;
2736
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002737static PyObject*
2738_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2739static PyObject *
2740_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2741 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002742
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743/* helpers */
2744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745LOCAL(PyObject*)
2746makeuniversal(XMLParserObject* self, const char* string)
2747{
2748 /* convert a UTF-8 tag/attribute name from the expat parser
2749 to a universal name string */
2750
Antoine Pitrouc1948842012-10-01 23:40:37 +02002751 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752 PyObject* key;
2753 PyObject* value;
2754
2755 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002756 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002757 if (!key)
2758 return NULL;
2759
2760 value = PyDict_GetItem(self->names, key);
2761
2762 if (value) {
2763 Py_INCREF(value);
2764 } else {
2765 /* new name. convert to universal name, and decode as
2766 necessary */
2767
2768 PyObject* tag;
2769 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002770 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771
2772 /* look for namespace separator */
2773 for (i = 0; i < size; i++)
2774 if (string[i] == '}')
2775 break;
2776 if (i != size) {
2777 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002778 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002779 if (tag == NULL) {
2780 Py_DECREF(key);
2781 return NULL;
2782 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002783 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002784 p[0] = '{';
2785 memcpy(p+1, string, size);
2786 size++;
2787 } else {
2788 /* plain name; use key as tag */
2789 Py_INCREF(key);
2790 tag = key;
2791 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002792
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002794 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002795 value = PyUnicode_DecodeUTF8(p, size, "strict");
2796 Py_DECREF(tag);
2797 if (!value) {
2798 Py_DECREF(key);
2799 return NULL;
2800 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002801
2802 /* add to names dictionary */
2803 if (PyDict_SetItem(self->names, key, value) < 0) {
2804 Py_DECREF(key);
2805 Py_DECREF(value);
2806 return NULL;
2807 }
2808 }
2809
2810 Py_DECREF(key);
2811 return value;
2812}
2813
Eli Bendersky5b77d812012-03-16 08:20:05 +02002814/* Set the ParseError exception with the given parameters.
2815 * If message is not NULL, it's used as the error string. Otherwise, the
2816 * message string is the default for the given error_code.
2817*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002818static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002819expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2820 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002821{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002822 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002823 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002825 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002826 message ? message : EXPAT(ErrorString)(error_code),
2827 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002828 if (errmsg == NULL)
2829 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002830
Eli Bendersky532d03e2013-08-10 08:00:39 -07002831 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002832 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002833 if (!error)
2834 return;
2835
Eli Bendersky5b77d812012-03-16 08:20:05 +02002836 /* Add code and position attributes */
2837 code = PyLong_FromLong((long)error_code);
2838 if (!code) {
2839 Py_DECREF(error);
2840 return;
2841 }
2842 if (PyObject_SetAttrString(error, "code", code) == -1) {
2843 Py_DECREF(error);
2844 Py_DECREF(code);
2845 return;
2846 }
2847 Py_DECREF(code);
2848
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002849 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002850 if (!position) {
2851 Py_DECREF(error);
2852 return;
2853 }
2854 if (PyObject_SetAttrString(error, "position", position) == -1) {
2855 Py_DECREF(error);
2856 Py_DECREF(position);
2857 return;
2858 }
2859 Py_DECREF(position);
2860
Eli Bendersky532d03e2013-08-10 08:00:39 -07002861 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002862 Py_DECREF(error);
2863}
2864
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002865/* -------------------------------------------------------------------- */
2866/* handlers */
2867
2868static void
2869expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2870 int data_len)
2871{
2872 PyObject* key;
2873 PyObject* value;
2874 PyObject* res;
2875
2876 if (data_len < 2 || data_in[0] != '&')
2877 return;
2878
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002879 if (PyErr_Occurred())
2880 return;
2881
Neal Norwitz0269b912007-08-08 06:56:02 +00002882 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002883 if (!key)
2884 return;
2885
2886 value = PyDict_GetItem(self->entity, key);
2887
2888 if (value) {
2889 if (TreeBuilder_CheckExact(self->target))
2890 res = treebuilder_handle_data(
2891 (TreeBuilderObject*) self->target, value
2892 );
2893 else if (self->handle_data)
2894 res = PyObject_CallFunction(self->handle_data, "O", value);
2895 else
2896 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002897 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002898 } else if (!PyErr_Occurred()) {
2899 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002900 char message[128] = "undefined entity ";
2901 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002902 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002903 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002905 EXPAT(GetErrorColumnNumber)(self->parser),
2906 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907 );
2908 }
2909
2910 Py_DECREF(key);
2911}
2912
2913static void
2914expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2915 const XML_Char **attrib_in)
2916{
2917 PyObject* res;
2918 PyObject* tag;
2919 PyObject* attrib;
2920 int ok;
2921
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002922 if (PyErr_Occurred())
2923 return;
2924
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 /* tag name */
2926 tag = makeuniversal(self, tag_in);
2927 if (!tag)
2928 return; /* parser will look for errors */
2929
2930 /* attributes */
2931 if (attrib_in[0]) {
2932 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002933 if (!attrib) {
2934 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002936 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002937 while (attrib_in[0] && attrib_in[1]) {
2938 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002939 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 if (!key || !value) {
2941 Py_XDECREF(value);
2942 Py_XDECREF(key);
2943 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002944 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 return;
2946 }
2947 ok = PyDict_SetItem(attrib, key, value);
2948 Py_DECREF(value);
2949 Py_DECREF(key);
2950 if (ok < 0) {
2951 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002952 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002953 return;
2954 }
2955 attrib_in += 2;
2956 }
2957 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002958 Py_INCREF(Py_None);
2959 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002960 }
2961
2962 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963 /* shortcut */
2964 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2965 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002966 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002967 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002968 if (attrib == Py_None) {
2969 Py_DECREF(attrib);
2970 attrib = PyDict_New();
2971 if (!attrib) {
2972 Py_DECREF(tag);
2973 return;
2974 }
2975 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 res = NULL;
2979
2980 Py_DECREF(tag);
2981 Py_DECREF(attrib);
2982
2983 Py_XDECREF(res);
2984}
2985
2986static void
2987expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2988 int data_len)
2989{
2990 PyObject* data;
2991 PyObject* res;
2992
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002993 if (PyErr_Occurred())
2994 return;
2995
Neal Norwitz0269b912007-08-08 06:56:02 +00002996 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002997 if (!data)
2998 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999
3000 if (TreeBuilder_CheckExact(self->target))
3001 /* shortcut */
3002 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3003 else if (self->handle_data)
3004 res = PyObject_CallFunction(self->handle_data, "O", data);
3005 else
3006 res = NULL;
3007
3008 Py_DECREF(data);
3009
3010 Py_XDECREF(res);
3011}
3012
3013static void
3014expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3015{
3016 PyObject* tag;
3017 PyObject* res = NULL;
3018
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003019 if (PyErr_Occurred())
3020 return;
3021
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 if (TreeBuilder_CheckExact(self->target))
3023 /* shortcut */
3024 /* the standard tree builder doesn't look at the end tag */
3025 res = treebuilder_handle_end(
3026 (TreeBuilderObject*) self->target, Py_None
3027 );
3028 else if (self->handle_end) {
3029 tag = makeuniversal(self, tag_in);
3030 if (tag) {
3031 res = PyObject_CallFunction(self->handle_end, "O", tag);
3032 Py_DECREF(tag);
3033 }
3034 }
3035
3036 Py_XDECREF(res);
3037}
3038
3039static void
3040expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3041 const XML_Char *uri)
3042{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003043 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3044 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003045
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003046 if (PyErr_Occurred())
3047 return;
3048
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003049 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003050 return;
3051
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003052 if (!uri)
3053 uri = "";
3054 if (!prefix)
3055 prefix = "";
3056
3057 parcel = Py_BuildValue("ss", prefix, uri);
3058 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003059 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003060 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3061 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062}
3063
3064static void
3065expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3066{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003067 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3068
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003069 if (PyErr_Occurred())
3070 return;
3071
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003072 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003073 return;
3074
3075 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076}
3077
3078static void
3079expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3080{
3081 PyObject* comment;
3082 PyObject* res;
3083
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003084 if (PyErr_Occurred())
3085 return;
3086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003088 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089 if (comment) {
3090 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3091 Py_XDECREF(res);
3092 Py_DECREF(comment);
3093 }
3094 }
3095}
3096
Eli Bendersky45839902013-01-13 05:14:47 -08003097static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003098expat_start_doctype_handler(XMLParserObject *self,
3099 const XML_Char *doctype_name,
3100 const XML_Char *sysid,
3101 const XML_Char *pubid,
3102 int has_internal_subset)
3103{
3104 PyObject *self_pyobj = (PyObject *)self;
3105 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3106 PyObject *parser_doctype = NULL;
3107 PyObject *res = NULL;
3108
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003109 if (PyErr_Occurred())
3110 return;
3111
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003112 doctype_name_obj = makeuniversal(self, doctype_name);
3113 if (!doctype_name_obj)
3114 return;
3115
3116 if (sysid) {
3117 sysid_obj = makeuniversal(self, sysid);
3118 if (!sysid_obj) {
3119 Py_DECREF(doctype_name_obj);
3120 return;
3121 }
3122 } else {
3123 Py_INCREF(Py_None);
3124 sysid_obj = Py_None;
3125 }
3126
3127 if (pubid) {
3128 pubid_obj = makeuniversal(self, pubid);
3129 if (!pubid_obj) {
3130 Py_DECREF(doctype_name_obj);
3131 Py_DECREF(sysid_obj);
3132 return;
3133 }
3134 } else {
3135 Py_INCREF(Py_None);
3136 pubid_obj = Py_None;
3137 }
3138
3139 /* If the target has a handler for doctype, call it. */
3140 if (self->handle_doctype) {
3141 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3142 doctype_name_obj, pubid_obj, sysid_obj);
3143 Py_CLEAR(res);
3144 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003145 else {
3146 /* Now see if the parser itself has a doctype method. If yes and it's
3147 * a custom method, call it but warn about deprecation. If it's only
3148 * the vanilla XMLParser method, do nothing.
3149 */
3150 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3151 if (parser_doctype &&
3152 !(PyCFunction_Check(parser_doctype) &&
3153 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3154 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003155 (PyCFunction) _elementtree_XMLParser_doctype)) {
3156 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3157 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003158 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003159 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003160 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003161 res = PyObject_CallFunction(parser_doctype, "OOO",
3162 doctype_name_obj, pubid_obj, sysid_obj);
3163 Py_CLEAR(res);
3164 }
3165 }
3166
3167clear:
3168 Py_XDECREF(parser_doctype);
3169 Py_DECREF(doctype_name_obj);
3170 Py_DECREF(pubid_obj);
3171 Py_DECREF(sysid_obj);
3172}
3173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174static void
3175expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3176 const XML_Char* data_in)
3177{
3178 PyObject* target;
3179 PyObject* data;
3180 PyObject* res;
3181
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003182 if (PyErr_Occurred())
3183 return;
3184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003186 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3187 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 if (target && data) {
3189 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3190 Py_XDECREF(res);
3191 Py_DECREF(data);
3192 Py_DECREF(target);
3193 } else {
3194 Py_XDECREF(data);
3195 Py_XDECREF(target);
3196 }
3197 }
3198}
3199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201
Eli Bendersky52467b12012-06-01 07:13:08 +03003202static PyObject *
3203xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204{
Eli Bendersky52467b12012-06-01 07:13:08 +03003205 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3206 if (self) {
3207 self->parser = NULL;
3208 self->target = self->entity = self->names = NULL;
3209 self->handle_start = self->handle_data = self->handle_end = NULL;
3210 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003211 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003213 return (PyObject *)self;
3214}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215
Serhiy Storchakacb985562015-05-04 15:32:48 +03003216/*[clinic input]
3217_elementtree.XMLParser.__init__
3218
3219 html: object = NULL
3220 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003221 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003222
3223[clinic start generated code]*/
3224
Eli Bendersky52467b12012-06-01 07:13:08 +03003225static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003226_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3227 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003228/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003229{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003230 self->entity = PyDict_New();
3231 if (!self->entity)
3232 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233
Serhiy Storchakacb985562015-05-04 15:32:48 +03003234 self->names = PyDict_New();
3235 if (!self->names) {
3236 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003239
Serhiy Storchakacb985562015-05-04 15:32:48 +03003240 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3241 if (!self->parser) {
3242 Py_CLEAR(self->entity);
3243 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003245 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246 }
3247
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 if (target) {
3249 Py_INCREF(target);
3250 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003251 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003253 Py_CLEAR(self->entity);
3254 Py_CLEAR(self->names);
3255 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003258 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003259 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260
Serhiy Storchakacb985562015-05-04 15:32:48 +03003261 self->handle_start = PyObject_GetAttrString(target, "start");
3262 self->handle_data = PyObject_GetAttrString(target, "data");
3263 self->handle_end = PyObject_GetAttrString(target, "end");
3264 self->handle_comment = PyObject_GetAttrString(target, "comment");
3265 self->handle_pi = PyObject_GetAttrString(target, "pi");
3266 self->handle_close = PyObject_GetAttrString(target, "close");
3267 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268
3269 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003270
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003274 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275 (XML_StartElementHandler) expat_start_handler,
3276 (XML_EndElementHandler) expat_end_handler
3277 );
3278 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 (XML_DefaultHandler) expat_default_handler
3281 );
3282 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003283 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 (XML_CharacterDataHandler) expat_data_handler
3285 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 (XML_CommentHandler) expat_comment_handler
3290 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 (XML_ProcessingInstructionHandler) expat_pi_handler
3295 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003296 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003298 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3299 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003302 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304
Eli Bendersky52467b12012-06-01 07:13:08 +03003305 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306}
3307
Eli Bendersky52467b12012-06-01 07:13:08 +03003308static int
3309xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3310{
3311 Py_VISIT(self->handle_close);
3312 Py_VISIT(self->handle_pi);
3313 Py_VISIT(self->handle_comment);
3314 Py_VISIT(self->handle_end);
3315 Py_VISIT(self->handle_data);
3316 Py_VISIT(self->handle_start);
3317
3318 Py_VISIT(self->target);
3319 Py_VISIT(self->entity);
3320 Py_VISIT(self->names);
3321
3322 return 0;
3323}
3324
3325static int
3326xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327{
3328 EXPAT(ParserFree)(self->parser);
3329
Antoine Pitrouc1948842012-10-01 23:40:37 +02003330 Py_CLEAR(self->handle_close);
3331 Py_CLEAR(self->handle_pi);
3332 Py_CLEAR(self->handle_comment);
3333 Py_CLEAR(self->handle_end);
3334 Py_CLEAR(self->handle_data);
3335 Py_CLEAR(self->handle_start);
3336 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
Antoine Pitrouc1948842012-10-01 23:40:37 +02003338 Py_CLEAR(self->target);
3339 Py_CLEAR(self->entity);
3340 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003341
Eli Bendersky52467b12012-06-01 07:13:08 +03003342 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343}
3344
Eli Bendersky52467b12012-06-01 07:13:08 +03003345static void
3346xmlparser_dealloc(XMLParserObject* self)
3347{
3348 PyObject_GC_UnTrack(self);
3349 xmlparser_gc_clear(self);
3350 Py_TYPE(self)->tp_free((PyObject *)self);
3351}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352
3353LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003354expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355{
3356 int ok;
3357
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003358 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3360
3361 if (PyErr_Occurred())
3362 return NULL;
3363
3364 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003365 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003366 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003368 EXPAT(GetErrorColumnNumber)(self->parser),
3369 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 );
3371 return NULL;
3372 }
3373
3374 Py_RETURN_NONE;
3375}
3376
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377/*[clinic input]
3378_elementtree.XMLParser.close
3379
3380[clinic start generated code]*/
3381
3382static PyObject *
3383_elementtree_XMLParser_close_impl(XMLParserObject *self)
3384/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385{
3386 /* end feeding data to parser */
3387
3388 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003390 if (!res)
3391 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003393 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 Py_DECREF(res);
3395 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003396 }
3397 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003398 Py_DECREF(res);
3399 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003400 }
3401 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003402 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003403 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404}
3405
Serhiy Storchakacb985562015-05-04 15:32:48 +03003406/*[clinic input]
3407_elementtree.XMLParser.feed
3408
3409 data: object
3410 /
3411
3412[clinic start generated code]*/
3413
3414static PyObject *
3415_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3416/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003417{
3418 /* feed data to parser */
3419
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003421 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003422 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3423 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003424 return NULL;
3425 if (data_len > INT_MAX) {
3426 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3427 return NULL;
3428 }
3429 /* Explicitly set UTF-8 encoding. Return code ignored. */
3430 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003431 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003432 }
3433 else {
3434 Py_buffer view;
3435 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003436 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003437 return NULL;
3438 if (view.len > INT_MAX) {
3439 PyBuffer_Release(&view);
3440 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3441 return NULL;
3442 }
3443 res = expat_parse(self, view.buf, (int)view.len, 0);
3444 PyBuffer_Release(&view);
3445 return res;
3446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447}
3448
Serhiy Storchakacb985562015-05-04 15:32:48 +03003449/*[clinic input]
3450_elementtree.XMLParser._parse_whole
3451
3452 file: object
3453 /
3454
3455[clinic start generated code]*/
3456
3457static PyObject *
3458_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3459/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460{
Eli Benderskya3699232013-05-19 18:47:23 -07003461 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462 PyObject* reader;
3463 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003464 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465 PyObject* res;
3466
Serhiy Storchakacb985562015-05-04 15:32:48 +03003467 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468 if (!reader)
3469 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003470
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471 /* read from open file object */
3472 for (;;) {
3473
3474 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3475
3476 if (!buffer) {
3477 /* read failed (e.g. due to KeyboardInterrupt) */
3478 Py_DECREF(reader);
3479 return NULL;
3480 }
3481
Eli Benderskyf996e772012-03-16 05:53:30 +02003482 if (PyUnicode_CheckExact(buffer)) {
3483 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003484 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003485 Py_DECREF(buffer);
3486 break;
3487 }
3488 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003489 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003490 if (!temp) {
3491 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003492 Py_DECREF(reader);
3493 return NULL;
3494 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 buffer = temp;
3496 }
3497 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 Py_DECREF(buffer);
3499 break;
3500 }
3501
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003502 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3503 Py_DECREF(buffer);
3504 Py_DECREF(reader);
3505 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3506 return NULL;
3507 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003509 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003510 );
3511
3512 Py_DECREF(buffer);
3513
3514 if (!res) {
3515 Py_DECREF(reader);
3516 return NULL;
3517 }
3518 Py_DECREF(res);
3519
3520 }
3521
3522 Py_DECREF(reader);
3523
3524 res = expat_parse(self, "", 0, 1);
3525
3526 if (res && TreeBuilder_CheckExact(self->target)) {
3527 Py_DECREF(res);
3528 return treebuilder_done((TreeBuilderObject*) self->target);
3529 }
3530
3531 return res;
3532}
3533
Serhiy Storchakacb985562015-05-04 15:32:48 +03003534/*[clinic input]
3535_elementtree.XMLParser.doctype
3536
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003537 name: object
3538 pubid: object
3539 system: object
3540 /
3541
Serhiy Storchakacb985562015-05-04 15:32:48 +03003542[clinic start generated code]*/
3543
3544static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003545_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3546 PyObject *pubid, PyObject *system)
3547/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003548{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003549 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3550 "This method of XMLParser is deprecated. Define"
3551 " doctype() method on the TreeBuilder target.",
3552 1) < 0) {
3553 return NULL;
3554 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003555 Py_RETURN_NONE;
3556}
3557
Serhiy Storchakacb985562015-05-04 15:32:48 +03003558/*[clinic input]
3559_elementtree.XMLParser._setevents
3560
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003561 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003562 events_to_report: object = None
3563 /
3564
3565[clinic start generated code]*/
3566
3567static PyObject *
3568_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3569 PyObject *events_queue,
3570 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003571/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003572{
3573 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003574 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003575 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003576 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577
3578 if (!TreeBuilder_CheckExact(self->target)) {
3579 PyErr_SetString(
3580 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003581 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 "targets"
3583 );
3584 return NULL;
3585 }
3586
3587 target = (TreeBuilderObject*) self->target;
3588
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003589 events_append = PyObject_GetAttrString(events_queue, "append");
3590 if (events_append == NULL)
3591 return NULL;
Serhiy Storchakaea8c4312015-12-24 11:53:16 +02003592 Py_SETREF(target->events_append, events_append);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593
3594 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003595 Py_CLEAR(target->start_event_obj);
3596 Py_CLEAR(target->end_event_obj);
3597 Py_CLEAR(target->start_ns_event_obj);
3598 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003600 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003601 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003602 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003603 Py_RETURN_NONE;
3604 }
3605
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003606 if (!(events_seq = PySequence_Fast(events_to_report,
3607 "events must be a sequence"))) {
3608 return NULL;
3609 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003611 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003612 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3613 char *event_name = NULL;
3614 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003615 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003616 } else if (PyBytes_Check(event_name_obj)) {
3617 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003618 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 if (event_name == NULL) {
3620 Py_DECREF(events_seq);
3621 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3622 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003623 }
3624
3625 Py_INCREF(event_name_obj);
3626 if (strcmp(event_name, "start") == 0) {
3627 Py_SETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003628 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003629 Py_SETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003630 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003631 Py_SETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003632 EXPAT(SetNamespaceDeclHandler)(
3633 self->parser,
3634 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3635 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3636 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003637 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003638 Py_SETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003639 EXPAT(SetNamespaceDeclHandler)(
3640 self->parser,
3641 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3642 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3643 );
3644 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003645 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003646 Py_DECREF(events_seq);
3647 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 return NULL;
3649 }
3650 }
3651
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003652 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003654}
3655
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003656static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003657xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003659 if (PyUnicode_Check(nameobj)) {
3660 PyObject* res;
3661 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3662 res = self->entity;
3663 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3664 res = self->target;
3665 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3666 return PyUnicode_FromFormat(
3667 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003669 }
3670 else
3671 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003672
Alexander Belopolskye239d232010-12-08 23:31:48 +00003673 Py_INCREF(res);
3674 return res;
3675 }
3676 generic:
3677 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678}
3679
Serhiy Storchakacb985562015-05-04 15:32:48 +03003680#include "clinic/_elementtree.c.h"
3681
3682static PyMethodDef element_methods[] = {
3683
3684 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3685
3686 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3687 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3688
3689 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3690 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3691 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3692
3693 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3694 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3695 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3696 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3697
3698 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3699 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3700 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3701
3702 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3703 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3704
3705 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3706 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3707
3708 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3709
3710 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3711 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3712 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3713 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3714 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3715
3716 {NULL, NULL}
3717};
3718
3719static PyMappingMethods element_as_mapping = {
3720 (lenfunc) element_length,
3721 (binaryfunc) element_subscr,
3722 (objobjargproc) element_ass_subscr,
3723};
3724
Serhiy Storchakadde08152015-11-25 15:28:13 +02003725static PyGetSetDef element_getsetlist[] = {
3726 {"tag",
3727 (getter)element_tag_getter,
3728 (setter)element_tag_setter,
3729 "A string identifying what kind of data this element represents"},
3730 {"text",
3731 (getter)element_text_getter,
3732 (setter)element_text_setter,
3733 "A string of text directly after the start tag, or None"},
3734 {"tail",
3735 (getter)element_tail_getter,
3736 (setter)element_tail_setter,
3737 "A string of text directly after the end tag, or None"},
3738 {"attrib",
3739 (getter)element_attrib_getter,
3740 (setter)element_attrib_setter,
3741 "A dictionary containing the element's attributes"},
3742 {NULL},
3743};
3744
Serhiy Storchakacb985562015-05-04 15:32:48 +03003745static PyTypeObject Element_Type = {
3746 PyVarObject_HEAD_INIT(NULL, 0)
3747 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3748 /* methods */
3749 (destructor)element_dealloc, /* tp_dealloc */
3750 0, /* tp_print */
3751 0, /* tp_getattr */
3752 0, /* tp_setattr */
3753 0, /* tp_reserved */
3754 (reprfunc)element_repr, /* tp_repr */
3755 0, /* tp_as_number */
3756 &element_as_sequence, /* tp_as_sequence */
3757 &element_as_mapping, /* tp_as_mapping */
3758 0, /* tp_hash */
3759 0, /* tp_call */
3760 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003761 PyObject_GenericGetAttr, /* tp_getattro */
3762 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003763 0, /* tp_as_buffer */
3764 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3765 /* tp_flags */
3766 0, /* tp_doc */
3767 (traverseproc)element_gc_traverse, /* tp_traverse */
3768 (inquiry)element_gc_clear, /* tp_clear */
3769 0, /* tp_richcompare */
3770 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3771 0, /* tp_iter */
3772 0, /* tp_iternext */
3773 element_methods, /* tp_methods */
3774 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003775 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003776 0, /* tp_base */
3777 0, /* tp_dict */
3778 0, /* tp_descr_get */
3779 0, /* tp_descr_set */
3780 0, /* tp_dictoffset */
3781 (initproc)element_init, /* tp_init */
3782 PyType_GenericAlloc, /* tp_alloc */
3783 element_new, /* tp_new */
3784 0, /* tp_free */
3785};
3786
3787static PyMethodDef treebuilder_methods[] = {
3788 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3789 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3790 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3791 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3792 {NULL, NULL}
3793};
3794
3795static PyTypeObject TreeBuilder_Type = {
3796 PyVarObject_HEAD_INIT(NULL, 0)
3797 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3798 /* methods */
3799 (destructor)treebuilder_dealloc, /* tp_dealloc */
3800 0, /* tp_print */
3801 0, /* tp_getattr */
3802 0, /* tp_setattr */
3803 0, /* tp_reserved */
3804 0, /* tp_repr */
3805 0, /* tp_as_number */
3806 0, /* tp_as_sequence */
3807 0, /* tp_as_mapping */
3808 0, /* tp_hash */
3809 0, /* tp_call */
3810 0, /* tp_str */
3811 0, /* tp_getattro */
3812 0, /* tp_setattro */
3813 0, /* tp_as_buffer */
3814 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3815 /* tp_flags */
3816 0, /* tp_doc */
3817 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3818 (inquiry)treebuilder_gc_clear, /* tp_clear */
3819 0, /* tp_richcompare */
3820 0, /* tp_weaklistoffset */
3821 0, /* tp_iter */
3822 0, /* tp_iternext */
3823 treebuilder_methods, /* tp_methods */
3824 0, /* tp_members */
3825 0, /* tp_getset */
3826 0, /* tp_base */
3827 0, /* tp_dict */
3828 0, /* tp_descr_get */
3829 0, /* tp_descr_set */
3830 0, /* tp_dictoffset */
3831 _elementtree_TreeBuilder___init__, /* tp_init */
3832 PyType_GenericAlloc, /* tp_alloc */
3833 treebuilder_new, /* tp_new */
3834 0, /* tp_free */
3835};
3836
3837static PyMethodDef xmlparser_methods[] = {
3838 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3839 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3840 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3841 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3842 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3843 {NULL, NULL}
3844};
3845
Neal Norwitz227b5332006-03-22 09:28:35 +00003846static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003847 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003848 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003849 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003850 (destructor)xmlparser_dealloc, /* tp_dealloc */
3851 0, /* tp_print */
3852 0, /* tp_getattr */
3853 0, /* tp_setattr */
3854 0, /* tp_reserved */
3855 0, /* tp_repr */
3856 0, /* tp_as_number */
3857 0, /* tp_as_sequence */
3858 0, /* tp_as_mapping */
3859 0, /* tp_hash */
3860 0, /* tp_call */
3861 0, /* tp_str */
3862 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3863 0, /* tp_setattro */
3864 0, /* tp_as_buffer */
3865 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3866 /* tp_flags */
3867 0, /* tp_doc */
3868 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3869 (inquiry)xmlparser_gc_clear, /* tp_clear */
3870 0, /* tp_richcompare */
3871 0, /* tp_weaklistoffset */
3872 0, /* tp_iter */
3873 0, /* tp_iternext */
3874 xmlparser_methods, /* tp_methods */
3875 0, /* tp_members */
3876 0, /* tp_getset */
3877 0, /* tp_base */
3878 0, /* tp_dict */
3879 0, /* tp_descr_get */
3880 0, /* tp_descr_set */
3881 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003882 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003883 PyType_GenericAlloc, /* tp_alloc */
3884 xmlparser_new, /* tp_new */
3885 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003886};
3887
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003888/* ==================================================================== */
3889/* python module interface */
3890
3891static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003892 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003893 {NULL, NULL}
3894};
3895
Martin v. Löwis1a214512008-06-11 05:26:20 +00003896
Eli Bendersky532d03e2013-08-10 08:00:39 -07003897static struct PyModuleDef elementtreemodule = {
3898 PyModuleDef_HEAD_INIT,
3899 "_elementtree",
3900 NULL,
3901 sizeof(elementtreestate),
3902 _functions,
3903 NULL,
3904 elementtree_traverse,
3905 elementtree_clear,
3906 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003907};
3908
Neal Norwitzf6657e62006-12-28 04:47:50 +00003909PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003910PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003911{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003912 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003913 elementtreestate *st;
3914
3915 m = PyState_FindModule(&elementtreemodule);
3916 if (m) {
3917 Py_INCREF(m);
3918 return m;
3919 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003920
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003921 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003922 if (PyType_Ready(&ElementIter_Type) < 0)
3923 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003924 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003925 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003926 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003927 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003928 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003929 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003930
Eli Bendersky532d03e2013-08-10 08:00:39 -07003931 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003932 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003933 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003934 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003935
Eli Bendersky828efde2012-04-05 05:40:58 +03003936 if (!(temp = PyImport_ImportModule("copy")))
3937 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003938 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003939 Py_XDECREF(temp);
3940
Eli Bendersky532d03e2013-08-10 08:00:39 -07003941 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003942 return NULL;
3943
Eli Bendersky20d41742012-06-01 09:48:37 +03003944 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003945 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3946 if (expat_capi) {
3947 /* check that it's usable */
3948 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003949 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003950 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3951 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003952 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003953 PyErr_SetString(PyExc_ImportError,
3954 "pyexpat version is incompatible");
3955 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003956 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003957 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003958 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003959 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003960
Eli Bendersky532d03e2013-08-10 08:00:39 -07003961 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003962 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003963 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003964 Py_INCREF(st->parseerror_obj);
3965 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003966
Eli Bendersky092af1f2012-03-04 07:14:03 +02003967 Py_INCREF((PyObject *)&Element_Type);
3968 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3969
Eli Bendersky58d548d2012-05-29 15:45:16 +03003970 Py_INCREF((PyObject *)&TreeBuilder_Type);
3971 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3972
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 Py_INCREF((PyObject *)&XMLParser_Type);
3974 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003975
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003976 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003977}