blob: 5908c725e13928289175d36f1e778b1eae70b7e3 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000132list_join(PyObject* list)
133{
134 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136 PyObject* result;
137
Antoine Pitrouc1948842012-10-01 23:40:37 +0200138 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000139 if (!joiner)
140 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200141 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000142 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200143 if (result)
144 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000145 return result;
146}
147
Eli Bendersky48d358b2012-05-30 17:57:50 +0300148/* Is the given object an empty dictionary?
149*/
150static int
151is_empty_dict(PyObject *obj)
152{
153 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
154}
155
156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200158/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159
160typedef struct {
161
162 /* attributes (a dictionary object), or None if no attributes */
163 PyObject* attrib;
164
165 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200166 Py_ssize_t length; /* actual number of items */
167 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000168
169 /* this either points to _children or to a malloced buffer */
170 PyObject* *children;
171
172 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000174} ElementObjectExtra;
175
176typedef struct {
177 PyObject_HEAD
178
179 /* element tag (a string). */
180 PyObject* tag;
181
182 /* text before first child. note that this is a tagged pointer;
183 use JOIN_OBJ to get the object pointer. the join flag is used
184 to distinguish lists created by the tree builder from lists
185 assigned to the attribute by application code; the former
186 should be joined before being returned to the user, the latter
187 should be left intact. */
188 PyObject* text;
189
190 /* text after this element, in parent. note that this is a tagged
191 pointer; use JOIN_OBJ to get the object pointer. */
192 PyObject* tail;
193
194 ElementObjectExtra* extra;
195
Eli Benderskyebf37a22012-04-03 22:02:37 +0300196 PyObject *weakreflist; /* For tp_weaklistoffset */
197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObject;
199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000200
Christian Heimes90aa7642007-12-19 02:45:37 +0000201#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000202
203/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200204/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000205
206LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200207create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000208{
209 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200210 if (!self->extra) {
211 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000212 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200213 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000214
215 if (!attrib)
216 attrib = Py_None;
217
218 Py_INCREF(attrib);
219 self->extra->attrib = attrib;
220
221 self->extra->length = 0;
222 self->extra->allocated = STATIC_CHILDREN;
223 self->extra->children = self->extra->_children;
224
225 return 0;
226}
227
228LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
Eli Bendersky08b85292012-04-04 15:55:07 +0300231 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200232 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300233
Eli Benderskyebf37a22012-04-03 22:02:37 +0300234 if (!self->extra)
235 return;
236
237 /* Avoid DECREFs calling into this code again (cycles, etc.)
238 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300239 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300240 self->extra = NULL;
241
242 Py_DECREF(myextra->attrib);
243
Eli Benderskyebf37a22012-04-03 22:02:37 +0300244 for (i = 0; i < myextra->length; i++)
245 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000246
Eli Benderskyebf37a22012-04-03 22:02:37 +0300247 if (myextra->children != myextra->_children)
248 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000249
Eli Benderskyebf37a22012-04-03 22:02:37 +0300250 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000251}
252
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253/* Convenience internal function to create new Element objects with the given
254 * tag and attributes.
255*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000256LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200257create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000258{
259 ElementObject* self;
260
Eli Bendersky0192ba32012-03-30 16:38:33 +0300261 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000262 if (self == NULL)
263 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000264 self->extra = NULL;
265
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000266 Py_INCREF(tag);
267 self->tag = tag;
268
269 Py_INCREF(Py_None);
270 self->text = Py_None;
271
272 Py_INCREF(Py_None);
273 self->tail = Py_None;
274
Eli Benderskyebf37a22012-04-03 22:02:37 +0300275 self->weakreflist = NULL;
276
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200277 ALLOC(sizeof(ElementObject), "create element");
278 PyObject_GC_Track(self);
279
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200280 if (attrib != Py_None && !is_empty_dict(attrib)) {
281 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200282 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200283 return NULL;
284 }
285 }
286
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000287 return (PyObject*) self;
288}
289
Eli Bendersky092af1f2012-03-04 07:14:03 +0200290static PyObject *
291element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
292{
293 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
294 if (e != NULL) {
295 Py_INCREF(Py_None);
296 e->tag = Py_None;
297
298 Py_INCREF(Py_None);
299 e->text = Py_None;
300
301 Py_INCREF(Py_None);
302 e->tail = Py_None;
303
304 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300305 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200306 }
307 return (PyObject *)e;
308}
309
Eli Bendersky737b1732012-05-29 06:02:56 +0300310/* Helper function for extracting the attrib dictionary from a keywords dict.
311 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800312 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300313 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700314 *
315 * Return a dictionary with the content of kwds merged into the content of
316 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300317 */
318static PyObject*
319get_attrib_from_keywords(PyObject *kwds)
320{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700321 PyObject *attrib_str = PyUnicode_FromString("attrib");
322 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300323
324 if (attrib) {
325 /* If attrib was found in kwds, copy its value and remove it from
326 * kwds
327 */
328 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700329 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300330 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
331 Py_TYPE(attrib)->tp_name);
332 return NULL;
333 }
334 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700335 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300336 } else {
337 attrib = PyDict_New();
338 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700339
340 Py_DECREF(attrib_str);
341
342 /* attrib can be NULL if PyDict_New failed */
343 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200344 if (PyDict_Update(attrib, kwds) < 0)
345 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300346 return attrib;
347}
348
Serhiy Storchakacb985562015-05-04 15:32:48 +0300349/*[clinic input]
350module _elementtree
351class _elementtree.Element "ElementObject *" "&Element_Type"
352class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
353class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
354[clinic start generated code]*/
355/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
356
Eli Bendersky092af1f2012-03-04 07:14:03 +0200357static int
358element_init(PyObject *self, PyObject *args, PyObject *kwds)
359{
360 PyObject *tag;
361 PyObject *tmp;
362 PyObject *attrib = NULL;
363 ElementObject *self_elem;
364
365 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
366 return -1;
367
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 if (attrib) {
369 /* attrib passed as positional arg */
370 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371 if (!attrib)
372 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300373 if (kwds) {
374 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200375 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300376 return -1;
377 }
378 }
379 } else if (kwds) {
380 /* have keywords args */
381 attrib = get_attrib_from_keywords(kwds);
382 if (!attrib)
383 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200384 }
385
386 self_elem = (ElementObject *)self;
387
Antoine Pitrouc1948842012-10-01 23:40:37 +0200388 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200389 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200390 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200391 return -1;
392 }
393 }
394
Eli Bendersky48d358b2012-05-30 17:57:50 +0300395 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200396 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200397
398 /* Replace the objects already pointed to by tag, text and tail. */
399 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200400 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200401 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200402 Py_DECREF(tmp);
403
404 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200406 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200407 Py_DECREF(JOIN_OBJ(tmp));
408
409 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200410 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200411 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200412 Py_DECREF(JOIN_OBJ(tmp));
413
414 return 0;
415}
416
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000417LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200418element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000419{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200420 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000421 PyObject* *children;
422
423 /* make sure self->children can hold the given number of extra
424 elements. set an exception and return -1 if allocation failed */
425
Victor Stinner5f0af232013-07-11 23:01:36 +0200426 if (!self->extra) {
427 if (create_extra(self, NULL) < 0)
428 return -1;
429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000430
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200431 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000432
433 if (size > self->extra->allocated) {
434 /* use Python 2.4's list growth strategy */
435 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000436 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100437 * which needs at least 4 bytes.
438 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000439 * be safe.
440 */
441 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
443 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000445 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100446 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000447 * false alarm always assume at least one child to be safe.
448 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000449 children = PyObject_Realloc(self->extra->children,
450 size * sizeof(PyObject*));
451 if (!children)
452 goto nomemory;
453 } else {
454 children = PyObject_Malloc(size * sizeof(PyObject*));
455 if (!children)
456 goto nomemory;
457 /* copy existing children from static area to malloc buffer */
458 memcpy(children, self->extra->children,
459 self->extra->length * sizeof(PyObject*));
460 }
461 self->extra->children = children;
462 self->extra->allocated = size;
463 }
464
465 return 0;
466
467 nomemory:
468 PyErr_NoMemory();
469 return -1;
470}
471
472LOCAL(int)
473element_add_subelement(ElementObject* self, PyObject* element)
474{
475 /* add a child element to a parent */
476
477 if (element_resize(self, 1) < 0)
478 return -1;
479
480 Py_INCREF(element);
481 self->extra->children[self->extra->length] = element;
482
483 self->extra->length++;
484
485 return 0;
486}
487
488LOCAL(PyObject*)
489element_get_attrib(ElementObject* self)
490{
491 /* return borrowed reference to attrib dictionary */
492 /* note: this function assumes that the extra section exists */
493
494 PyObject* res = self->extra->attrib;
495
496 if (res == Py_None) {
497 /* create missing dictionary */
498 res = PyDict_New();
499 if (!res)
500 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200501 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000502 self->extra->attrib = res;
503 }
504
505 return res;
506}
507
508LOCAL(PyObject*)
509element_get_text(ElementObject* self)
510{
511 /* return borrowed reference to text attribute */
512
513 PyObject* res = self->text;
514
515 if (JOIN_GET(res)) {
516 res = JOIN_OBJ(res);
517 if (PyList_CheckExact(res)) {
518 res = list_join(res);
519 if (!res)
520 return NULL;
521 self->text = res;
522 }
523 }
524
525 return res;
526}
527
528LOCAL(PyObject*)
529element_get_tail(ElementObject* self)
530{
531 /* return borrowed reference to text attribute */
532
533 PyObject* res = self->tail;
534
535 if (JOIN_GET(res)) {
536 res = JOIN_OBJ(res);
537 if (PyList_CheckExact(res)) {
538 res = list_join(res);
539 if (!res)
540 return NULL;
541 self->tail = res;
542 }
543 }
544
545 return res;
546}
547
548static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300549subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000550{
551 PyObject* elem;
552
553 ElementObject* parent;
554 PyObject* tag;
555 PyObject* attrib = NULL;
556 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
557 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800558 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000559 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800560 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000561
Eli Bendersky737b1732012-05-29 06:02:56 +0300562 if (attrib) {
563 /* attrib passed as positional arg */
564 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000565 if (!attrib)
566 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300567 if (kwds) {
568 if (PyDict_Update(attrib, kwds) < 0) {
569 return NULL;
570 }
571 }
572 } else if (kwds) {
573 /* have keyword args */
574 attrib = get_attrib_from_keywords(kwds);
575 if (!attrib)
576 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000577 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300578 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000579 Py_INCREF(Py_None);
580 attrib = Py_None;
581 }
582
Eli Bendersky092af1f2012-03-04 07:14:03 +0200583 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200585 if (elem == NULL)
586 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000587
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000588 if (element_add_subelement(parent, elem) < 0) {
589 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000590 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000591 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000592
593 return elem;
594}
595
Eli Bendersky0192ba32012-03-30 16:38:33 +0300596static int
597element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
598{
599 Py_VISIT(self->tag);
600 Py_VISIT(JOIN_OBJ(self->text));
601 Py_VISIT(JOIN_OBJ(self->tail));
602
603 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200604 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300605 Py_VISIT(self->extra->attrib);
606
607 for (i = 0; i < self->extra->length; ++i)
608 Py_VISIT(self->extra->children[i]);
609 }
610 return 0;
611}
612
613static int
614element_gc_clear(ElementObject *self)
615{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300616 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700617 _clear_joined_ptr(&self->text);
618 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300619
620 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300621 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300622 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300623 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300624 return 0;
625}
626
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000627static void
628element_dealloc(ElementObject* self)
629{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300630 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300631
632 if (self->weakreflist != NULL)
633 PyObject_ClearWeakRefs((PyObject *) self);
634
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 /* element_gc_clear clears all references and deallocates extra
636 */
637 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000638
639 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200640 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000641}
642
643/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000644
Serhiy Storchakacb985562015-05-04 15:32:48 +0300645/*[clinic input]
646_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000647
Serhiy Storchakacb985562015-05-04 15:32:48 +0300648 subelement: object(subclass_of='&Element_Type')
649 /
650
651[clinic start generated code]*/
652
653static PyObject *
654_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
655/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
656{
657 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000658 return NULL;
659
660 Py_RETURN_NONE;
661}
662
Serhiy Storchakacb985562015-05-04 15:32:48 +0300663/*[clinic input]
664_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665
Serhiy Storchakacb985562015-05-04 15:32:48 +0300666[clinic start generated code]*/
667
668static PyObject *
669_elementtree_Element_clear_impl(ElementObject *self)
670/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
671{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300672 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
674 Py_INCREF(Py_None);
675 Py_DECREF(JOIN_OBJ(self->text));
676 self->text = Py_None;
677
678 Py_INCREF(Py_None);
679 Py_DECREF(JOIN_OBJ(self->tail));
680 self->tail = Py_None;
681
682 Py_RETURN_NONE;
683}
684
Serhiy Storchakacb985562015-05-04 15:32:48 +0300685/*[clinic input]
686_elementtree.Element.__copy__
687
688[clinic start generated code]*/
689
690static PyObject *
691_elementtree_Element___copy___impl(ElementObject *self)
692/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000693{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200694 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000695 ElementObject* element;
696
Eli Bendersky092af1f2012-03-04 07:14:03 +0200697 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800698 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699 if (!element)
700 return NULL;
701
702 Py_DECREF(JOIN_OBJ(element->text));
703 element->text = self->text;
704 Py_INCREF(JOIN_OBJ(element->text));
705
706 Py_DECREF(JOIN_OBJ(element->tail));
707 element->tail = self->tail;
708 Py_INCREF(JOIN_OBJ(element->tail));
709
710 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000711 if (element_resize(element, self->extra->length) < 0) {
712 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000713 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000714 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000715
716 for (i = 0; i < self->extra->length; i++) {
717 Py_INCREF(self->extra->children[i]);
718 element->extra->children[i] = self->extra->children[i];
719 }
720
721 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000722 }
723
724 return (PyObject*) element;
725}
726
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200727/* Helper for a deep copy. */
728LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
729
Serhiy Storchakacb985562015-05-04 15:32:48 +0300730/*[clinic input]
731_elementtree.Element.__deepcopy__
732
733 memo: object
734 /
735
736[clinic start generated code]*/
737
738static PyObject *
739_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
740/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200742 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000743 ElementObject* element;
744 PyObject* tag;
745 PyObject* attrib;
746 PyObject* text;
747 PyObject* tail;
748 PyObject* id;
749
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200802 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Serhiy Storchaka060ed712015-12-21 12:57:27 +0200820LOCAL(PyObject *)
821deepcopy(PyObject *object, PyObject *memo)
822{
823 /* do a deep copy of the given object */
824 PyObject *args;
825 PyObject *result;
826 elementtreestate *st;
827
828 /* Fast paths */
829 if (object == Py_None || PyUnicode_CheckExact(object)) {
830 Py_INCREF(object);
831 return object;
832 }
833
834 if (Py_REFCNT(object) == 1) {
835 if (PyDict_CheckExact(object)) {
836 PyObject *key, *value;
837 Py_ssize_t pos = 0;
838 int simple = 1;
839 while (PyDict_Next(object, &pos, &key, &value)) {
840 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
841 simple = 0;
842 break;
843 }
844 }
845 if (simple)
846 return PyDict_Copy(object);
847 /* Fall through to general case */
848 }
849 else if (Element_CheckExact(object)) {
850 return _elementtree_Element___deepcopy__((ElementObject *)object, memo);
851 }
852 }
853
854 /* General case */
855 st = ET_STATE_GLOBAL;
856 if (!st->deepcopy_obj) {
857 PyErr_SetString(PyExc_RuntimeError,
858 "deepcopy helper not found");
859 return NULL;
860 }
861
862 args = PyTuple_Pack(2, object, memo);
863 if (!args)
864 return NULL;
865 result = PyObject_CallObject(st->deepcopy_obj, args);
866 Py_DECREF(args);
867 return result;
868}
869
870
Serhiy Storchakacb985562015-05-04 15:32:48 +0300871/*[clinic input]
872_elementtree.Element.__sizeof__ -> Py_ssize_t
873
874[clinic start generated code]*/
875
876static Py_ssize_t
877_elementtree_Element___sizeof___impl(ElementObject *self)
878/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200879{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200880 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200881 if (self->extra) {
882 result += sizeof(ElementObjectExtra);
883 if (self->extra->children != self->extra->_children)
884 result += sizeof(PyObject*) * self->extra->allocated;
885 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300886 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200887}
888
Eli Bendersky698bdb22013-01-10 06:01:06 -0800889/* dict keys for getstate/setstate. */
890#define PICKLED_TAG "tag"
891#define PICKLED_CHILDREN "_children"
892#define PICKLED_ATTRIB "attrib"
893#define PICKLED_TAIL "tail"
894#define PICKLED_TEXT "text"
895
896/* __getstate__ returns a fabricated instance dict as in the pure-Python
897 * Element implementation, for interoperability/interchangeability. This
898 * makes the pure-Python implementation details an API, but (a) there aren't
899 * any unnecessary structures there; and (b) it buys compatibility with 3.2
900 * pickles. See issue #16076.
901 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300902/*[clinic input]
903_elementtree.Element.__getstate__
904
905[clinic start generated code]*/
906
Eli Bendersky698bdb22013-01-10 06:01:06 -0800907static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300908_elementtree_Element___getstate___impl(ElementObject *self)
909/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800910{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200911 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 PyObject *instancedict = NULL, *children;
913
914 /* Build a list of children. */
915 children = PyList_New(self->extra ? self->extra->length : 0);
916 if (!children)
917 return NULL;
918 for (i = 0; i < PyList_GET_SIZE(children); i++) {
919 PyObject *child = self->extra->children[i];
920 Py_INCREF(child);
921 PyList_SET_ITEM(children, i, child);
922 }
923
924 /* Construct the state object. */
925 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
926 if (noattrib)
927 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
928 PICKLED_TAG, self->tag,
929 PICKLED_CHILDREN, children,
930 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700931 PICKLED_TEXT, JOIN_OBJ(self->text),
932 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800933 else
934 instancedict = Py_BuildValue("{sOsOsOsOsO}",
935 PICKLED_TAG, self->tag,
936 PICKLED_CHILDREN, children,
937 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700938 PICKLED_TEXT, JOIN_OBJ(self->text),
939 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800940 if (instancedict) {
941 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800943 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944 else {
945 for (i = 0; i < PyList_GET_SIZE(children); i++)
946 Py_DECREF(PyList_GET_ITEM(children, i));
947 Py_DECREF(children);
948
949 return NULL;
950 }
951}
952
953static PyObject *
954element_setstate_from_attributes(ElementObject *self,
955 PyObject *tag,
956 PyObject *attrib,
957 PyObject *text,
958 PyObject *tail,
959 PyObject *children)
960{
961 Py_ssize_t i, nchildren;
962
963 if (!tag) {
964 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
965 return NULL;
966 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800967
968 Py_CLEAR(self->tag);
969 self->tag = tag;
970 Py_INCREF(self->tag);
971
Eli Benderskydd3661e2013-09-13 06:24:25 -0700972 _clear_joined_ptr(&self->text);
973 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
974 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800975
Eli Benderskydd3661e2013-09-13 06:24:25 -0700976 _clear_joined_ptr(&self->tail);
977 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
978 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800979
980 /* Handle ATTRIB and CHILDREN. */
981 if (!children && !attrib)
982 Py_RETURN_NONE;
983
984 /* Compute 'nchildren'. */
985 if (children) {
986 if (!PyList_Check(children)) {
987 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
988 return NULL;
989 }
990 nchildren = PyList_Size(children);
991 }
992 else {
993 nchildren = 0;
994 }
995
996 /* Allocate 'extra'. */
997 if (element_resize(self, nchildren)) {
998 return NULL;
999 }
1000 assert(self->extra && self->extra->allocated >= nchildren);
1001
1002 /* Copy children */
1003 for (i = 0; i < nchildren; i++) {
1004 self->extra->children[i] = PyList_GET_ITEM(children, i);
1005 Py_INCREF(self->extra->children[i]);
1006 }
1007
1008 self->extra->length = nchildren;
1009 self->extra->allocated = nchildren;
1010
1011 /* Stash attrib. */
1012 if (attrib) {
1013 Py_CLEAR(self->extra->attrib);
1014 self->extra->attrib = attrib;
1015 Py_INCREF(attrib);
1016 }
1017
1018 Py_RETURN_NONE;
1019}
1020
1021/* __setstate__ for Element instance from the Python implementation.
1022 * 'state' should be the instance dict.
1023 */
Serhiy Storchakacb985562015-05-04 15:32:48 +03001024
Eli Bendersky698bdb22013-01-10 06:01:06 -08001025static PyObject *
1026element_setstate_from_Python(ElementObject *self, PyObject *state)
1027{
1028 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1029 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1030 PyObject *args;
1031 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001032 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001033
Eli Bendersky698bdb22013-01-10 06:01:06 -08001034 tag = attrib = text = tail = children = NULL;
1035 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001036 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001037 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001038
1039 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1040 &attrib, &text, &tail, &children))
1041 retval = element_setstate_from_attributes(self, tag, attrib, text,
1042 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001043 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001044 retval = NULL;
1045
1046 Py_DECREF(args);
1047 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001048}
1049
Serhiy Storchakacb985562015-05-04 15:32:48 +03001050/*[clinic input]
1051_elementtree.Element.__setstate__
1052
1053 state: object
1054 /
1055
1056[clinic start generated code]*/
1057
Eli Bendersky698bdb22013-01-10 06:01:06 -08001058static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001059_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1060/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001061{
1062 if (!PyDict_CheckExact(state)) {
1063 PyErr_Format(PyExc_TypeError,
1064 "Don't know how to unpickle \"%.200R\" as an Element",
1065 state);
1066 return NULL;
1067 }
1068 else
1069 return element_setstate_from_Python(self, state);
1070}
1071
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001072LOCAL(int)
1073checkpath(PyObject* tag)
1074{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001075 Py_ssize_t i;
1076 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001077
1078 /* check if a tag contains an xpath character */
1079
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001080#define PATHCHAR(ch) \
1081 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001082
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001083 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1085 void *data = PyUnicode_DATA(tag);
1086 unsigned int kind = PyUnicode_KIND(tag);
1087 for (i = 0; i < len; i++) {
1088 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1089 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001090 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001091 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001092 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001093 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001094 return 1;
1095 }
1096 return 0;
1097 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001098 if (PyBytes_Check(tag)) {
1099 char *p = PyBytes_AS_STRING(tag);
1100 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001101 if (p[i] == '{')
1102 check = 0;
1103 else if (p[i] == '}')
1104 check = 1;
1105 else if (check && PATHCHAR(p[i]))
1106 return 1;
1107 }
1108 return 0;
1109 }
1110
1111 return 1; /* unknown type; might be path expression */
1112}
1113
Serhiy Storchakacb985562015-05-04 15:32:48 +03001114/*[clinic input]
1115_elementtree.Element.extend
1116
1117 elements: object
1118 /
1119
1120[clinic start generated code]*/
1121
1122static PyObject *
1123_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1124/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125{
1126 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001127 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128
Serhiy Storchakacb985562015-05-04 15:32:48 +03001129 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001130 if (!seq) {
1131 PyErr_Format(
1132 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001133 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001134 );
1135 return NULL;
1136 }
1137
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001138 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001139 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001140 Py_INCREF(element);
1141 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001142 PyErr_Format(
1143 PyExc_TypeError,
1144 "expected an Element, not \"%.200s\"",
1145 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001146 Py_DECREF(seq);
1147 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001148 return NULL;
1149 }
1150
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001151 if (element_add_subelement(self, element) < 0) {
1152 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001153 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001154 return NULL;
1155 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001156 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001157 }
1158
1159 Py_DECREF(seq);
1160
1161 Py_RETURN_NONE;
1162}
1163
Serhiy Storchakacb985562015-05-04 15:32:48 +03001164/*[clinic input]
1165_elementtree.Element.find
1166
1167 path: object
1168 namespaces: object = None
1169
1170[clinic start generated code]*/
1171
1172static PyObject *
1173_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1174 PyObject *namespaces)
1175/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001176{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001177 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001178 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001179
Serhiy Storchakacb985562015-05-04 15:32:48 +03001180 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001181 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001182 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001183 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001184 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001185 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001186
1187 if (!self->extra)
1188 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001189
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 for (i = 0; i < self->extra->length; i++) {
1191 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001192 int rc;
1193 if (!Element_CheckExact(item))
1194 continue;
1195 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001196 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001197 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001198 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001199 Py_DECREF(item);
1200 if (rc < 0)
1201 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001202 }
1203
1204 Py_RETURN_NONE;
1205}
1206
Serhiy Storchakacb985562015-05-04 15:32:48 +03001207/*[clinic input]
1208_elementtree.Element.findtext
1209
1210 path: object
1211 default: object = None
1212 namespaces: object = None
1213
1214[clinic start generated code]*/
1215
1216static PyObject *
1217_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1218 PyObject *default_value,
1219 PyObject *namespaces)
1220/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001222 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001223 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001224 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001225
Serhiy Storchakacb985562015-05-04 15:32:48 +03001226 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001227 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001228 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001229 );
1230
1231 if (!self->extra) {
1232 Py_INCREF(default_value);
1233 return default_value;
1234 }
1235
1236 for (i = 0; i < self->extra->length; i++) {
1237 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001238 int rc;
1239 if (!Element_CheckExact(item))
1240 continue;
1241 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001242 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001243 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001245 if (text == Py_None) {
1246 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001247 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001248 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001249 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001250 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251 return text;
1252 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001253 Py_DECREF(item);
1254 if (rc < 0)
1255 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256 }
1257
1258 Py_INCREF(default_value);
1259 return default_value;
1260}
1261
Serhiy Storchakacb985562015-05-04 15:32:48 +03001262/*[clinic input]
1263_elementtree.Element.findall
1264
1265 path: object
1266 namespaces: object = None
1267
1268[clinic start generated code]*/
1269
1270static PyObject *
1271_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1272 PyObject *namespaces)
1273/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001274{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001275 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001276 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001277 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001278 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001281 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001283 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001284 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001285 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001286
1287 out = PyList_New(0);
1288 if (!out)
1289 return NULL;
1290
1291 if (!self->extra)
1292 return out;
1293
1294 for (i = 0; i < self->extra->length; i++) {
1295 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001296 int rc;
1297 if (!Element_CheckExact(item))
1298 continue;
1299 Py_INCREF(item);
1300 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1301 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1302 Py_DECREF(item);
1303 Py_DECREF(out);
1304 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001305 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001306 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001307 }
1308
1309 return out;
1310}
1311
Serhiy Storchakacb985562015-05-04 15:32:48 +03001312/*[clinic input]
1313_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001314
Serhiy Storchakacb985562015-05-04 15:32:48 +03001315 path: object
1316 namespaces: object = None
1317
1318[clinic start generated code]*/
1319
1320static PyObject *
1321_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1322 PyObject *namespaces)
1323/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1324{
1325 PyObject* tag = path;
1326 _Py_IDENTIFIER(iterfind);
1327 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001328
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001329 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001330 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001331}
1332
Serhiy Storchakacb985562015-05-04 15:32:48 +03001333/*[clinic input]
1334_elementtree.Element.get
1335
1336 key: object
1337 default: object = None
1338
1339[clinic start generated code]*/
1340
1341static PyObject *
1342_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1343 PyObject *default_value)
1344/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001345{
1346 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347
1348 if (!self->extra || self->extra->attrib == Py_None)
1349 value = default_value;
1350 else {
1351 value = PyDict_GetItem(self->extra->attrib, key);
1352 if (!value)
1353 value = default_value;
1354 }
1355
1356 Py_INCREF(value);
1357 return value;
1358}
1359
Serhiy Storchakacb985562015-05-04 15:32:48 +03001360/*[clinic input]
1361_elementtree.Element.getchildren
1362
1363[clinic start generated code]*/
1364
1365static PyObject *
1366_elementtree_Element_getchildren_impl(ElementObject *self)
1367/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001369 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001370 PyObject* list;
1371
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001372 /* FIXME: report as deprecated? */
1373
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001374 if (!self->extra)
1375 return PyList_New(0);
1376
1377 list = PyList_New(self->extra->length);
1378 if (!list)
1379 return NULL;
1380
1381 for (i = 0; i < self->extra->length; i++) {
1382 PyObject* item = self->extra->children[i];
1383 Py_INCREF(item);
1384 PyList_SET_ITEM(list, i, item);
1385 }
1386
1387 return list;
1388}
1389
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001390
Eli Bendersky64d11e62012-06-15 07:42:50 +03001391static PyObject *
1392create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1393
1394
Serhiy Storchakacb985562015-05-04 15:32:48 +03001395/*[clinic input]
1396_elementtree.Element.iter
1397
1398 tag: object = None
1399
1400[clinic start generated code]*/
1401
Eli Bendersky64d11e62012-06-15 07:42:50 +03001402static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001403_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1404/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001405{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001406 if (PyUnicode_Check(tag)) {
1407 if (PyUnicode_READY(tag) < 0)
1408 return NULL;
1409 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1410 tag = Py_None;
1411 }
1412 else if (PyBytes_Check(tag)) {
1413 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1414 tag = Py_None;
1415 }
1416
Eli Bendersky64d11e62012-06-15 07:42:50 +03001417 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001418}
1419
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
1422_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001423
Serhiy Storchakacb985562015-05-04 15:32:48 +03001424[clinic start generated code]*/
1425
1426static PyObject *
1427_elementtree_Element_itertext_impl(ElementObject *self)
1428/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1429{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001430 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001431}
1432
Eli Bendersky64d11e62012-06-15 07:42:50 +03001433
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001435element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001437 ElementObject* self = (ElementObject*) self_;
1438
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439 if (!self->extra || index < 0 || index >= self->extra->length) {
1440 PyErr_SetString(
1441 PyExc_IndexError,
1442 "child index out of range"
1443 );
1444 return NULL;
1445 }
1446
1447 Py_INCREF(self->extra->children[index]);
1448 return self->extra->children[index];
1449}
1450
Serhiy Storchakacb985562015-05-04 15:32:48 +03001451/*[clinic input]
1452_elementtree.Element.insert
1453
1454 index: Py_ssize_t
1455 subelement: object(subclass_of='&Element_Type')
1456 /
1457
1458[clinic start generated code]*/
1459
1460static PyObject *
1461_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1462 PyObject *subelement)
1463/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001465 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466
Victor Stinner5f0af232013-07-11 23:01:36 +02001467 if (!self->extra) {
1468 if (create_extra(self, NULL) < 0)
1469 return NULL;
1470 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001471
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001472 if (index < 0) {
1473 index += self->extra->length;
1474 if (index < 0)
1475 index = 0;
1476 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001477 if (index > self->extra->length)
1478 index = self->extra->length;
1479
1480 if (element_resize(self, 1) < 0)
1481 return NULL;
1482
1483 for (i = self->extra->length; i > index; i--)
1484 self->extra->children[i] = self->extra->children[i-1];
1485
Serhiy Storchakacb985562015-05-04 15:32:48 +03001486 Py_INCREF(subelement);
1487 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488
1489 self->extra->length++;
1490
1491 Py_RETURN_NONE;
1492}
1493
Serhiy Storchakacb985562015-05-04 15:32:48 +03001494/*[clinic input]
1495_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001496
Serhiy Storchakacb985562015-05-04 15:32:48 +03001497[clinic start generated code]*/
1498
1499static PyObject *
1500_elementtree_Element_items_impl(ElementObject *self)
1501/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1502{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001503 if (!self->extra || self->extra->attrib == Py_None)
1504 return PyList_New(0);
1505
1506 return PyDict_Items(self->extra->attrib);
1507}
1508
Serhiy Storchakacb985562015-05-04 15:32:48 +03001509/*[clinic input]
1510_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511
Serhiy Storchakacb985562015-05-04 15:32:48 +03001512[clinic start generated code]*/
1513
1514static PyObject *
1515_elementtree_Element_keys_impl(ElementObject *self)
1516/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1517{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001518 if (!self->extra || self->extra->attrib == Py_None)
1519 return PyList_New(0);
1520
1521 return PyDict_Keys(self->extra->attrib);
1522}
1523
Martin v. Löwis18e16552006-02-15 17:27:45 +00001524static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001525element_length(ElementObject* self)
1526{
1527 if (!self->extra)
1528 return 0;
1529
1530 return self->extra->length;
1531}
1532
Serhiy Storchakacb985562015-05-04 15:32:48 +03001533/*[clinic input]
1534_elementtree.Element.makeelement
1535
1536 tag: object
1537 attrib: object
1538 /
1539
1540[clinic start generated code]*/
1541
1542static PyObject *
1543_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1544 PyObject *attrib)
1545/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546{
1547 PyObject* elem;
1548
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549 attrib = PyDict_Copy(attrib);
1550 if (!attrib)
1551 return NULL;
1552
Eli Bendersky092af1f2012-03-04 07:14:03 +02001553 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001554
1555 Py_DECREF(attrib);
1556
1557 return elem;
1558}
1559
Serhiy Storchakacb985562015-05-04 15:32:48 +03001560/*[clinic input]
1561_elementtree.Element.remove
1562
1563 subelement: object(subclass_of='&Element_Type')
1564 /
1565
1566[clinic start generated code]*/
1567
1568static PyObject *
1569_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1570/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001571{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001572 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001573 int rc;
1574 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001576 if (!self->extra) {
1577 /* element has no children, so raise exception */
1578 PyErr_SetString(
1579 PyExc_ValueError,
1580 "list.remove(x): x not in list"
1581 );
1582 return NULL;
1583 }
1584
1585 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001586 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001587 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001588 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001589 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001590 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001591 if (rc < 0)
1592 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001593 }
1594
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001595 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001596 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001597 PyErr_SetString(
1598 PyExc_ValueError,
1599 "list.remove(x): x not in list"
1600 );
1601 return NULL;
1602 }
1603
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001604 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001605
1606 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001607 for (; i < self->extra->length; i++)
1608 self->extra->children[i] = self->extra->children[i+1];
1609
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001610 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611 Py_RETURN_NONE;
1612}
1613
1614static PyObject*
1615element_repr(ElementObject* self)
1616{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001617 if (self->tag)
1618 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1619 else
1620 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001621}
1622
Serhiy Storchakacb985562015-05-04 15:32:48 +03001623/*[clinic input]
1624_elementtree.Element.set
1625
1626 key: object
1627 value: object
1628 /
1629
1630[clinic start generated code]*/
1631
1632static PyObject *
1633_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1634 PyObject *value)
1635/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001636{
1637 PyObject* attrib;
1638
Victor Stinner5f0af232013-07-11 23:01:36 +02001639 if (!self->extra) {
1640 if (create_extra(self, NULL) < 0)
1641 return NULL;
1642 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001643
1644 attrib = element_get_attrib(self);
1645 if (!attrib)
1646 return NULL;
1647
1648 if (PyDict_SetItem(attrib, key, value) < 0)
1649 return NULL;
1650
1651 Py_RETURN_NONE;
1652}
1653
1654static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001655element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001656{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001657 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001658 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001659 PyObject* old;
1660
1661 if (!self->extra || index < 0 || index >= self->extra->length) {
1662 PyErr_SetString(
1663 PyExc_IndexError,
1664 "child assignment index out of range");
1665 return -1;
1666 }
1667
1668 old = self->extra->children[index];
1669
1670 if (item) {
1671 Py_INCREF(item);
1672 self->extra->children[index] = item;
1673 } else {
1674 self->extra->length--;
1675 for (i = index; i < self->extra->length; i++)
1676 self->extra->children[i] = self->extra->children[i+1];
1677 }
1678
1679 Py_DECREF(old);
1680
1681 return 0;
1682}
1683
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001684static PyObject*
1685element_subscr(PyObject* self_, PyObject* item)
1686{
1687 ElementObject* self = (ElementObject*) self_;
1688
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001689 if (PyIndex_Check(item)) {
1690 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001691
1692 if (i == -1 && PyErr_Occurred()) {
1693 return NULL;
1694 }
1695 if (i < 0 && self->extra)
1696 i += self->extra->length;
1697 return element_getitem(self_, i);
1698 }
1699 else if (PySlice_Check(item)) {
1700 Py_ssize_t start, stop, step, slicelen, cur, i;
1701 PyObject* list;
1702
1703 if (!self->extra)
1704 return PyList_New(0);
1705
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001706 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001707 self->extra->length,
1708 &start, &stop, &step, &slicelen) < 0) {
1709 return NULL;
1710 }
1711
1712 if (slicelen <= 0)
1713 return PyList_New(0);
1714 else {
1715 list = PyList_New(slicelen);
1716 if (!list)
1717 return NULL;
1718
1719 for (cur = start, i = 0; i < slicelen;
1720 cur += step, i++) {
1721 PyObject* item = self->extra->children[cur];
1722 Py_INCREF(item);
1723 PyList_SET_ITEM(list, i, item);
1724 }
1725
1726 return list;
1727 }
1728 }
1729 else {
1730 PyErr_SetString(PyExc_TypeError,
1731 "element indices must be integers");
1732 return NULL;
1733 }
1734}
1735
1736static int
1737element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1738{
1739 ElementObject* self = (ElementObject*) self_;
1740
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001741 if (PyIndex_Check(item)) {
1742 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743
1744 if (i == -1 && PyErr_Occurred()) {
1745 return -1;
1746 }
1747 if (i < 0 && self->extra)
1748 i += self->extra->length;
1749 return element_setitem(self_, i, value);
1750 }
1751 else if (PySlice_Check(item)) {
1752 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1753
1754 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001755 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001756
Victor Stinner5f0af232013-07-11 23:01:36 +02001757 if (!self->extra) {
1758 if (create_extra(self, NULL) < 0)
1759 return -1;
1760 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001761
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001762 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001763 self->extra->length,
1764 &start, &stop, &step, &slicelen) < 0) {
1765 return -1;
1766 }
1767
Eli Bendersky865756a2012-03-09 13:38:15 +02001768 if (value == NULL) {
1769 /* Delete slice */
1770 size_t cur;
1771 Py_ssize_t i;
1772
1773 if (slicelen <= 0)
1774 return 0;
1775
1776 /* Since we're deleting, the direction of the range doesn't matter,
1777 * so for simplicity make it always ascending.
1778 */
1779 if (step < 0) {
1780 stop = start + 1;
1781 start = stop + step * (slicelen - 1) - 1;
1782 step = -step;
1783 }
1784
1785 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1786
1787 /* recycle is a list that will contain all the children
1788 * scheduled for removal.
1789 */
1790 if (!(recycle = PyList_New(slicelen))) {
1791 PyErr_NoMemory();
1792 return -1;
1793 }
1794
1795 /* This loop walks over all the children that have to be deleted,
1796 * with cur pointing at them. num_moved is the amount of children
1797 * until the next deleted child that have to be "shifted down" to
1798 * occupy the deleted's places.
1799 * Note that in the ith iteration, shifting is done i+i places down
1800 * because i children were already removed.
1801 */
1802 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1803 /* Compute how many children have to be moved, clipping at the
1804 * list end.
1805 */
1806 Py_ssize_t num_moved = step - 1;
1807 if (cur + step >= (size_t)self->extra->length) {
1808 num_moved = self->extra->length - cur - 1;
1809 }
1810
1811 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1812
1813 memmove(
1814 self->extra->children + cur - i,
1815 self->extra->children + cur + 1,
1816 num_moved * sizeof(PyObject *));
1817 }
1818
1819 /* Leftover "tail" after the last removed child */
1820 cur = start + (size_t)slicelen * step;
1821 if (cur < (size_t)self->extra->length) {
1822 memmove(
1823 self->extra->children + cur - slicelen,
1824 self->extra->children + cur,
1825 (self->extra->length - cur) * sizeof(PyObject *));
1826 }
1827
1828 self->extra->length -= slicelen;
1829
1830 /* Discard the recycle list with all the deleted sub-elements */
1831 Py_XDECREF(recycle);
1832 return 0;
1833 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001834
1835 /* A new slice is actually being assigned */
1836 seq = PySequence_Fast(value, "");
1837 if (!seq) {
1838 PyErr_Format(
1839 PyExc_TypeError,
1840 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1841 );
1842 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001843 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001844 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001845
1846 if (step != 1 && newlen != slicelen)
1847 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001848 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001849 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001850 "attempt to assign sequence of size %zd "
1851 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001852 newlen, slicelen
1853 );
1854 return -1;
1855 }
1856
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001857 /* Resize before creating the recycle bin, to prevent refleaks. */
1858 if (newlen > slicelen) {
1859 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001860 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001861 return -1;
1862 }
1863 }
1864
1865 if (slicelen > 0) {
1866 /* to avoid recursive calls to this method (via decref), move
1867 old items to the recycle bin here, and get rid of them when
1868 we're done modifying the element */
1869 recycle = PyList_New(slicelen);
1870 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001871 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001872 return -1;
1873 }
1874 for (cur = start, i = 0; i < slicelen;
1875 cur += step, i++)
1876 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1877 }
1878
1879 if (newlen < slicelen) {
1880 /* delete slice */
1881 for (i = stop; i < self->extra->length; i++)
1882 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1883 } else if (newlen > slicelen) {
1884 /* insert slice */
1885 for (i = self->extra->length-1; i >= stop; i--)
1886 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1887 }
1888
1889 /* replace the slice */
1890 for (cur = start, i = 0; i < newlen;
1891 cur += step, i++) {
1892 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1893 Py_INCREF(element);
1894 self->extra->children[cur] = element;
1895 }
1896
1897 self->extra->length += newlen - slicelen;
1898
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001899 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001900
1901 /* discard the recycle bin, and everything in it */
1902 Py_XDECREF(recycle);
1903
1904 return 0;
1905 }
1906 else {
1907 PyErr_SetString(PyExc_TypeError,
1908 "element indices must be integers");
1909 return -1;
1910 }
1911}
1912
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001913static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001914element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001915{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001916 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001917 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001918 return res;
1919}
1920
Serhiy Storchakadde08152015-11-25 15:28:13 +02001921static PyObject*
1922element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001923{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001924 PyObject *res = element_get_text(self);
1925 Py_XINCREF(res);
1926 return res;
1927}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001928
Serhiy Storchakadde08152015-11-25 15:28:13 +02001929static PyObject*
1930element_tail_getter(ElementObject *self, void *closure)
1931{
1932 PyObject *res = element_get_tail(self);
1933 Py_XINCREF(res);
1934 return res;
1935}
1936
1937static PyObject*
1938element_attrib_getter(ElementObject *self, void *closure)
1939{
1940 PyObject *res;
1941 if (!self->extra) {
1942 if (create_extra(self, NULL) < 0)
1943 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001944 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001945 res = element_get_attrib(self);
1946 Py_XINCREF(res);
1947 return res;
1948}
Victor Stinner4d463432013-07-11 23:05:03 +02001949
Serhiy Storchakadde08152015-11-25 15:28:13 +02001950/* macro for setter validation */
1951#define _VALIDATE_ATTR_VALUE(V) \
1952 if ((V) == NULL) { \
1953 PyErr_SetString( \
1954 PyExc_AttributeError, \
1955 "can't delete element attribute"); \
1956 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001957 }
1958
Serhiy Storchakadde08152015-11-25 15:28:13 +02001959static int
1960element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1961{
1962 _VALIDATE_ATTR_VALUE(value);
1963 Py_INCREF(value);
1964 Py_DECREF(self->tag);
1965 self->tag = value;
1966 return 0;
1967}
1968
1969static int
1970element_text_setter(ElementObject *self, PyObject *value, void *closure)
1971{
1972 _VALIDATE_ATTR_VALUE(value);
1973 Py_INCREF(value);
1974 Py_DECREF(JOIN_OBJ(self->text));
1975 self->text = value;
1976 return 0;
1977}
1978
1979static int
1980element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1981{
1982 _VALIDATE_ATTR_VALUE(value);
1983 Py_INCREF(value);
1984 Py_DECREF(JOIN_OBJ(self->tail));
1985 self->tail = value;
1986 return 0;
1987}
1988
1989static int
1990element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1991{
1992 _VALIDATE_ATTR_VALUE(value);
1993 if (!self->extra) {
1994 if (create_extra(self, NULL) < 0)
1995 return -1;
1996 }
1997 Py_INCREF(value);
1998 Py_DECREF(self->extra->attrib);
1999 self->extra->attrib = value;
Eli Benderskyef9683b2013-05-18 07:52:34 -07002000 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002001}
2002
2003static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002004 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002005 0, /* sq_concat */
2006 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002008 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00002009 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002010 0,
2011};
2012
Eli Bendersky64d11e62012-06-15 07:42:50 +03002013/******************************* Element iterator ****************************/
2014
2015/* ElementIterObject represents the iteration state over an XML element in
2016 * pre-order traversal. To keep track of which sub-element should be returned
2017 * next, a stack of parents is maintained. This is a standard stack-based
2018 * iterative pre-order traversal of a tree.
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002019 * The stack is managed using a continuous array.
2020 * Each stack item contains the saved parent to which we should return after
Eli Bendersky64d11e62012-06-15 07:42:50 +03002021 * the current one is exhausted, and the next child to examine in that parent.
2022 */
2023typedef struct ParentLocator_t {
2024 ElementObject *parent;
2025 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002026} ParentLocator;
2027
2028typedef struct {
2029 PyObject_HEAD
2030 ParentLocator *parent_stack;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002031 Py_ssize_t parent_stack_used;
2032 Py_ssize_t parent_stack_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002033 ElementObject *root_element;
2034 PyObject *sought_tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002035 int gettext;
2036} ElementIterObject;
2037
2038
2039static void
2040elementiter_dealloc(ElementIterObject *it)
2041{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002042 Py_ssize_t i = it->parent_stack_used;
2043 it->parent_stack_used = 0;
2044 while (i--)
2045 Py_XDECREF(it->parent_stack[i].parent);
2046 PyMem_Free(it->parent_stack);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002047
2048 Py_XDECREF(it->sought_tag);
2049 Py_XDECREF(it->root_element);
2050
2051 PyObject_GC_UnTrack(it);
2052 PyObject_GC_Del(it);
2053}
2054
2055static int
2056elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2057{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002058 Py_ssize_t i = it->parent_stack_used;
2059 while (i--)
2060 Py_VISIT(it->parent_stack[i].parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061
2062 Py_VISIT(it->root_element);
2063 Py_VISIT(it->sought_tag);
2064 return 0;
2065}
2066
2067/* Helper function for elementiter_next. Add a new parent to the parent stack.
2068 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002069static int
2070parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002071{
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002072 ParentLocator *item;
2073
2074 if (it->parent_stack_used >= it->parent_stack_size) {
2075 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2076 ParentLocator *parent_stack = it->parent_stack;
2077 PyMem_Resize(parent_stack, ParentLocator, new_size);
2078 if (parent_stack == NULL)
2079 return -1;
2080 it->parent_stack = parent_stack;
2081 it->parent_stack_size = new_size;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002082 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002083 item = it->parent_stack + it->parent_stack_used++;
2084 Py_INCREF(parent);
2085 item->parent = parent;
2086 item->child_index = 0;
2087 return 0;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002088}
2089
2090static PyObject *
2091elementiter_next(ElementIterObject *it)
2092{
2093 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002094 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002095 * A short note on gettext: this function serves both the iter() and
2096 * itertext() methods to avoid code duplication. However, there are a few
2097 * small differences in the way these iterations work. Namely:
2098 * - itertext() only yields text from nodes that have it, and continues
2099 * iterating when a node doesn't have text (so it doesn't return any
2100 * node like iter())
2101 * - itertext() also has to handle tail, after finishing with all the
2102 * children of a node.
2103 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002104 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002105 ElementObject *elem;
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002106 PyObject *text;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002107
2108 while (1) {
2109 /* Handle the case reached in the beginning and end of iteration, where
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002110 * the parent stack is empty. If root_element is NULL and we're here, the
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111 * iterator is exhausted.
2112 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002113 if (!it->parent_stack_used) {
2114 if (!it->root_element) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002115 PyErr_SetNone(PyExc_StopIteration);
2116 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 }
2118
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002119 elem = it->root_element; /* steals a reference */
2120 it->root_element = NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 }
2122 else {
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002123 /* See if there are children left to traverse in the current parent. If
2124 * yes, visit the next child. If not, pop the stack and try again.
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 */
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002126 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2127 Py_ssize_t child_index = item->child_index;
2128 ElementObjectExtra *extra;
2129 elem = item->parent;
2130 extra = elem->extra;
2131 if (!extra || child_index >= extra->length) {
2132 it->parent_stack_used--;
2133 /* Note that extra condition on it->parent_stack_used here;
2134 * this is because itertext() is supposed to only return *inner*
2135 * text, not text following the element it began iteration with.
2136 */
2137 if (it->gettext && it->parent_stack_used) {
2138 text = element_get_tail(elem);
2139 goto gettext;
2140 }
2141 Py_DECREF(elem);
2142 continue;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002143 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002144
2145 elem = (ElementObject *)extra->children[child_index];
2146 item->child_index++;
2147 Py_INCREF(elem);
2148 }
2149
2150 if (parent_stack_push_new(it, elem) < 0) {
2151 Py_DECREF(elem);
2152 PyErr_NoMemory();
2153 return NULL;
2154 }
2155 if (it->gettext) {
2156 text = element_get_text(elem);
2157 goto gettext;
2158 }
2159
2160 if (it->sought_tag == Py_None)
2161 return (PyObject *)elem;
2162
2163 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2164 if (rc > 0)
2165 return (PyObject *)elem;
2166
2167 Py_DECREF(elem);
2168 if (rc < 0)
2169 return NULL;
2170 continue;
2171
2172gettext:
2173 if (!text) {
2174 Py_DECREF(elem);
2175 return NULL;
2176 }
2177 if (text == Py_None) {
2178 Py_DECREF(elem);
2179 }
2180 else {
2181 Py_INCREF(text);
2182 Py_DECREF(elem);
2183 rc = PyObject_IsTrue(text);
2184 if (rc > 0)
2185 return text;
2186 Py_DECREF(text);
2187 if (rc < 0)
2188 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002189 }
2190 }
2191
2192 return NULL;
2193}
2194
2195
2196static PyTypeObject ElementIter_Type = {
2197 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002198 /* Using the module's name since the pure-Python implementation does not
2199 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 "_elementtree._element_iterator", /* tp_name */
2201 sizeof(ElementIterObject), /* tp_basicsize */
2202 0, /* tp_itemsize */
2203 /* methods */
2204 (destructor)elementiter_dealloc, /* tp_dealloc */
2205 0, /* tp_print */
2206 0, /* tp_getattr */
2207 0, /* tp_setattr */
2208 0, /* tp_reserved */
2209 0, /* tp_repr */
2210 0, /* tp_as_number */
2211 0, /* tp_as_sequence */
2212 0, /* tp_as_mapping */
2213 0, /* tp_hash */
2214 0, /* tp_call */
2215 0, /* tp_str */
2216 0, /* tp_getattro */
2217 0, /* tp_setattro */
2218 0, /* tp_as_buffer */
2219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2220 0, /* tp_doc */
2221 (traverseproc)elementiter_traverse, /* tp_traverse */
2222 0, /* tp_clear */
2223 0, /* tp_richcompare */
2224 0, /* tp_weaklistoffset */
2225 PyObject_SelfIter, /* tp_iter */
2226 (iternextfunc)elementiter_next, /* tp_iternext */
2227 0, /* tp_methods */
2228 0, /* tp_members */
2229 0, /* tp_getset */
2230 0, /* tp_base */
2231 0, /* tp_dict */
2232 0, /* tp_descr_get */
2233 0, /* tp_descr_set */
2234 0, /* tp_dictoffset */
2235 0, /* tp_init */
2236 0, /* tp_alloc */
2237 0, /* tp_new */
2238};
2239
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002240#define INIT_PARENT_STACK_SIZE 8
Eli Bendersky64d11e62012-06-15 07:42:50 +03002241
2242static PyObject *
2243create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2244{
2245 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002246
2247 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2248 if (!it)
2249 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002250
Victor Stinner4d463432013-07-11 23:05:03 +02002251 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002252 it->sought_tag = tag;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002253 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002254 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002255 it->root_element = self;
2256
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002258
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002259 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002260 if (it->parent_stack == NULL) {
2261 Py_DECREF(it);
2262 PyErr_NoMemory();
2263 return NULL;
2264 }
Serhiy Storchaka22adf2a2015-12-21 12:43:54 +02002265 it->parent_stack_used = 0;
2266 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002267
Eli Bendersky64d11e62012-06-15 07:42:50 +03002268 return (PyObject *)it;
2269}
2270
2271
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272/* ==================================================================== */
2273/* the tree builder type */
2274
2275typedef struct {
2276 PyObject_HEAD
2277
Eli Bendersky58d548d2012-05-29 15:45:16 +03002278 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279
Antoine Pitrouee329312012-10-04 19:53:29 +02002280 PyObject *this; /* current node */
2281 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
Eli Bendersky58d548d2012-05-29 15:45:16 +03002283 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285 PyObject *stack; /* element stack */
2286 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288 PyObject *element_factory;
2289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002291 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002292 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2293 PyObject *end_event_obj;
2294 PyObject *start_ns_event_obj;
2295 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296} TreeBuilderObject;
2297
Christian Heimes90aa7642007-12-19 02:45:37 +00002298#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002299
2300/* -------------------------------------------------------------------- */
2301/* constructor and destructor */
2302
Eli Bendersky58d548d2012-05-29 15:45:16 +03002303static PyObject *
2304treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2307 if (t != NULL) {
2308 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002311 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002312 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002313 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002314
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002316 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317 t->stack = PyList_New(20);
2318 if (!t->stack) {
2319 Py_DECREF(t->this);
2320 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002321 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 return NULL;
2323 }
2324 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002326 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002327 t->start_event_obj = t->end_event_obj = NULL;
2328 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2329 }
2330 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331}
2332
Serhiy Storchakacb985562015-05-04 15:32:48 +03002333/*[clinic input]
2334_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002335
Serhiy Storchakacb985562015-05-04 15:32:48 +03002336 element_factory: object = NULL
2337
2338[clinic start generated code]*/
2339
2340static int
2341_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2342 PyObject *element_factory)
2343/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2344{
2345 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002346
2347 if (element_factory) {
2348 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002349 tmp = self->element_factory;
2350 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002351 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002352 }
2353
Eli Bendersky58d548d2012-05-29 15:45:16 +03002354 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002355}
2356
Eli Bendersky48d358b2012-05-30 17:57:50 +03002357static int
2358treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2359{
2360 Py_VISIT(self->root);
2361 Py_VISIT(self->this);
2362 Py_VISIT(self->last);
2363 Py_VISIT(self->data);
2364 Py_VISIT(self->stack);
2365 Py_VISIT(self->element_factory);
2366 return 0;
2367}
2368
2369static int
2370treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002372 Py_CLEAR(self->end_ns_event_obj);
2373 Py_CLEAR(self->start_ns_event_obj);
2374 Py_CLEAR(self->end_event_obj);
2375 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002376 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002377 Py_CLEAR(self->stack);
2378 Py_CLEAR(self->data);
2379 Py_CLEAR(self->last);
2380 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002381 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002382 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002383 return 0;
2384}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002385
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386static void
2387treebuilder_dealloc(TreeBuilderObject *self)
2388{
2389 PyObject_GC_UnTrack(self);
2390 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002391 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002392}
2393
2394/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002395/* helpers for handling of arbitrary element-like objects */
2396
2397static int
2398treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2399 PyObject **dest, _Py_Identifier *name)
2400{
2401 if (Element_CheckExact(element)) {
2402 Py_DECREF(JOIN_OBJ(*dest));
2403 *dest = JOIN_SET(data, PyList_CheckExact(data));
2404 return 0;
2405 }
2406 else {
2407 PyObject *joined = list_join(data);
2408 int r;
2409 if (joined == NULL)
2410 return -1;
2411 r = _PyObject_SetAttrId(element, name, joined);
2412 Py_DECREF(joined);
2413 return r;
2414 }
2415}
2416
2417/* These two functions steal a reference to data */
2418static int
2419treebuilder_set_element_text(PyObject *element, PyObject *data)
2420{
2421 _Py_IDENTIFIER(text);
2422 return treebuilder_set_element_text_or_tail(
2423 element, data, &((ElementObject *) element)->text, &PyId_text);
2424}
2425
2426static int
2427treebuilder_set_element_tail(PyObject *element, PyObject *data)
2428{
2429 _Py_IDENTIFIER(tail);
2430 return treebuilder_set_element_text_or_tail(
2431 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2432}
2433
2434static int
2435treebuilder_add_subelement(PyObject *element, PyObject *child)
2436{
2437 _Py_IDENTIFIER(append);
2438 if (Element_CheckExact(element)) {
2439 ElementObject *elem = (ElementObject *) element;
2440 return element_add_subelement(elem, child);
2441 }
2442 else {
2443 PyObject *res;
2444 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2445 if (res == NULL)
2446 return -1;
2447 Py_DECREF(res);
2448 return 0;
2449 }
2450}
2451
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002452LOCAL(int)
2453treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2454 PyObject *node)
2455{
2456 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002457 PyObject *res;
2458 PyObject *event = PyTuple_Pack(2, action, node);
2459 if (event == NULL)
2460 return -1;
2461 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2462 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002463 if (res == NULL)
2464 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002465 Py_DECREF(res);
2466 }
2467 return 0;
2468}
2469
Antoine Pitrouee329312012-10-04 19:53:29 +02002470/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471/* handlers */
2472
2473LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2475 PyObject* attrib)
2476{
2477 PyObject* node;
2478 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002479 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002480
2481 if (self->data) {
2482 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002483 if (treebuilder_set_element_text(self->last, self->data))
2484 return NULL;
2485 }
2486 else {
2487 if (treebuilder_set_element_tail(self->last, self->data))
2488 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002489 }
2490 self->data = NULL;
2491 }
2492
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002493 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002494 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002495 } else if (attrib == Py_None) {
2496 attrib = PyDict_New();
2497 if (!attrib)
2498 return NULL;
2499 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2500 Py_DECREF(attrib);
2501 }
2502 else {
2503 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002504 }
2505 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002506 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002507 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508
Antoine Pitrouee329312012-10-04 19:53:29 +02002509 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510
2511 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002512 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002513 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002514 } else {
2515 if (self->root) {
2516 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002517 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002518 "multiple elements on top level"
2519 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002520 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521 }
2522 Py_INCREF(node);
2523 self->root = node;
2524 }
2525
2526 if (self->index < PyList_GET_SIZE(self->stack)) {
2527 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002528 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529 Py_INCREF(this);
2530 } else {
2531 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002532 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533 }
2534 self->index++;
2535
2536 Py_DECREF(this);
2537 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002538 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539
2540 Py_DECREF(self->last);
2541 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002542 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002544 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2545 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546
2547 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002548
2549 error:
2550 Py_DECREF(node);
2551 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002552}
2553
2554LOCAL(PyObject*)
2555treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2556{
2557 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002558 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002559 /* ignore calls to data before the first call to start */
2560 Py_RETURN_NONE;
2561 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002562 /* store the first item as is */
2563 Py_INCREF(data); self->data = data;
2564 } else {
2565 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002566 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2567 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002568 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569 /* expat often generates single character data sections; handle
2570 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002571 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2572 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002574 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575 } else if (PyList_CheckExact(self->data)) {
2576 if (PyList_Append(self->data, data) < 0)
2577 return NULL;
2578 } else {
2579 PyObject* list = PyList_New(2);
2580 if (!list)
2581 return NULL;
2582 PyList_SET_ITEM(list, 0, self->data);
2583 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2584 self->data = list;
2585 }
2586 }
2587
2588 Py_RETURN_NONE;
2589}
2590
2591LOCAL(PyObject*)
2592treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2593{
2594 PyObject* item;
2595
2596 if (self->data) {
2597 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002598 if (treebuilder_set_element_text(self->last, self->data))
2599 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002601 if (treebuilder_set_element_tail(self->last, self->data))
2602 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002603 }
2604 self->data = NULL;
2605 }
2606
2607 if (self->index == 0) {
2608 PyErr_SetString(
2609 PyExc_IndexError,
2610 "pop from empty stack"
2611 );
2612 return NULL;
2613 }
2614
2615 self->index--;
2616
2617 item = PyList_GET_ITEM(self->stack, self->index);
2618 Py_INCREF(item);
2619
2620 Py_DECREF(self->last);
2621
Antoine Pitrouee329312012-10-04 19:53:29 +02002622 self->last = self->this;
2623 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002625 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627
2628 Py_INCREF(self->last);
2629 return (PyObject*) self->last;
2630}
2631
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632/* -------------------------------------------------------------------- */
2633/* methods (in alphabetical order) */
2634
Serhiy Storchakacb985562015-05-04 15:32:48 +03002635/*[clinic input]
2636_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002637
Serhiy Storchakacb985562015-05-04 15:32:48 +03002638 data: object
2639 /
2640
2641[clinic start generated code]*/
2642
2643static PyObject *
2644_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2645/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2646{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002647 return treebuilder_handle_data(self, data);
2648}
2649
Serhiy Storchakacb985562015-05-04 15:32:48 +03002650/*[clinic input]
2651_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002652
Serhiy Storchakacb985562015-05-04 15:32:48 +03002653 tag: object
2654 /
2655
2656[clinic start generated code]*/
2657
2658static PyObject *
2659_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2660/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2661{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002662 return treebuilder_handle_end(self, tag);
2663}
2664
2665LOCAL(PyObject*)
2666treebuilder_done(TreeBuilderObject* self)
2667{
2668 PyObject* res;
2669
2670 /* FIXME: check stack size? */
2671
2672 if (self->root)
2673 res = self->root;
2674 else
2675 res = Py_None;
2676
2677 Py_INCREF(res);
2678 return res;
2679}
2680
Serhiy Storchakacb985562015-05-04 15:32:48 +03002681/*[clinic input]
2682_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683
Serhiy Storchakacb985562015-05-04 15:32:48 +03002684[clinic start generated code]*/
2685
2686static PyObject *
2687_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2688/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2689{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690 return treebuilder_done(self);
2691}
2692
Serhiy Storchakacb985562015-05-04 15:32:48 +03002693/*[clinic input]
2694_elementtree.TreeBuilder.start
2695
2696 tag: object
2697 attrs: object = None
2698 /
2699
2700[clinic start generated code]*/
2701
2702static PyObject *
2703_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2704 PyObject *attrs)
2705/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002706{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002707 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708}
2709
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002710/* ==================================================================== */
2711/* the expat interface */
2712
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002715
2716/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2717 * cached globally without being in per-module state.
2718 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002719static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721
Eli Bendersky52467b12012-06-01 07:13:08 +03002722static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2723 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2724
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725typedef struct {
2726 PyObject_HEAD
2727
2728 XML_Parser parser;
2729
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002730 PyObject *target;
2731 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002733 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002735 PyObject *handle_start;
2736 PyObject *handle_data;
2737 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002739 PyObject *handle_comment;
2740 PyObject *handle_pi;
2741 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002743 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002744
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745} XMLParserObject;
2746
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002747static PyObject*
2748_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2749static PyObject *
2750_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2751 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002752
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753/* helpers */
2754
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755LOCAL(PyObject*)
2756makeuniversal(XMLParserObject* self, const char* string)
2757{
2758 /* convert a UTF-8 tag/attribute name from the expat parser
2759 to a universal name string */
2760
Antoine Pitrouc1948842012-10-01 23:40:37 +02002761 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762 PyObject* key;
2763 PyObject* value;
2764
2765 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002766 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767 if (!key)
2768 return NULL;
2769
2770 value = PyDict_GetItem(self->names, key);
2771
2772 if (value) {
2773 Py_INCREF(value);
2774 } else {
2775 /* new name. convert to universal name, and decode as
2776 necessary */
2777
2778 PyObject* tag;
2779 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002780 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781
2782 /* look for namespace separator */
2783 for (i = 0; i < size; i++)
2784 if (string[i] == '}')
2785 break;
2786 if (i != size) {
2787 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002789 if (tag == NULL) {
2790 Py_DECREF(key);
2791 return NULL;
2792 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002793 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002794 p[0] = '{';
2795 memcpy(p+1, string, size);
2796 size++;
2797 } else {
2798 /* plain name; use key as tag */
2799 Py_INCREF(key);
2800 tag = key;
2801 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002802
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002803 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002804 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002805 value = PyUnicode_DecodeUTF8(p, size, "strict");
2806 Py_DECREF(tag);
2807 if (!value) {
2808 Py_DECREF(key);
2809 return NULL;
2810 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002811
2812 /* add to names dictionary */
2813 if (PyDict_SetItem(self->names, key, value) < 0) {
2814 Py_DECREF(key);
2815 Py_DECREF(value);
2816 return NULL;
2817 }
2818 }
2819
2820 Py_DECREF(key);
2821 return value;
2822}
2823
Eli Bendersky5b77d812012-03-16 08:20:05 +02002824/* Set the ParseError exception with the given parameters.
2825 * If message is not NULL, it's used as the error string. Otherwise, the
2826 * message string is the default for the given error_code.
2827*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002829expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2830 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002831{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002832 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002833 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002834
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002835 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002836 message ? message : EXPAT(ErrorString)(error_code),
2837 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002838 if (errmsg == NULL)
2839 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002840
Eli Bendersky532d03e2013-08-10 08:00:39 -07002841 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002842 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002843 if (!error)
2844 return;
2845
Eli Bendersky5b77d812012-03-16 08:20:05 +02002846 /* Add code and position attributes */
2847 code = PyLong_FromLong((long)error_code);
2848 if (!code) {
2849 Py_DECREF(error);
2850 return;
2851 }
2852 if (PyObject_SetAttrString(error, "code", code) == -1) {
2853 Py_DECREF(error);
2854 Py_DECREF(code);
2855 return;
2856 }
2857 Py_DECREF(code);
2858
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002859 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860 if (!position) {
2861 Py_DECREF(error);
2862 return;
2863 }
2864 if (PyObject_SetAttrString(error, "position", position) == -1) {
2865 Py_DECREF(error);
2866 Py_DECREF(position);
2867 return;
2868 }
2869 Py_DECREF(position);
2870
Eli Bendersky532d03e2013-08-10 08:00:39 -07002871 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872 Py_DECREF(error);
2873}
2874
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002875/* -------------------------------------------------------------------- */
2876/* handlers */
2877
2878static void
2879expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2880 int data_len)
2881{
2882 PyObject* key;
2883 PyObject* value;
2884 PyObject* res;
2885
2886 if (data_len < 2 || data_in[0] != '&')
2887 return;
2888
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002889 if (PyErr_Occurred())
2890 return;
2891
Neal Norwitz0269b912007-08-08 06:56:02 +00002892 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002893 if (!key)
2894 return;
2895
2896 value = PyDict_GetItem(self->entity, key);
2897
2898 if (value) {
2899 if (TreeBuilder_CheckExact(self->target))
2900 res = treebuilder_handle_data(
2901 (TreeBuilderObject*) self->target, value
2902 );
2903 else if (self->handle_data)
2904 res = PyObject_CallFunction(self->handle_data, "O", value);
2905 else
2906 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002907 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002908 } else if (!PyErr_Occurred()) {
2909 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002910 char message[128] = "undefined entity ";
2911 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002912 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002913 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002915 EXPAT(GetErrorColumnNumber)(self->parser),
2916 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002917 );
2918 }
2919
2920 Py_DECREF(key);
2921}
2922
2923static void
2924expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2925 const XML_Char **attrib_in)
2926{
2927 PyObject* res;
2928 PyObject* tag;
2929 PyObject* attrib;
2930 int ok;
2931
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002932 if (PyErr_Occurred())
2933 return;
2934
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 /* tag name */
2936 tag = makeuniversal(self, tag_in);
2937 if (!tag)
2938 return; /* parser will look for errors */
2939
2940 /* attributes */
2941 if (attrib_in[0]) {
2942 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002943 if (!attrib) {
2944 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002946 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002947 while (attrib_in[0] && attrib_in[1]) {
2948 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002949 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002950 if (!key || !value) {
2951 Py_XDECREF(value);
2952 Py_XDECREF(key);
2953 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002954 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 return;
2956 }
2957 ok = PyDict_SetItem(attrib, key, value);
2958 Py_DECREF(value);
2959 Py_DECREF(key);
2960 if (ok < 0) {
2961 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002962 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002963 return;
2964 }
2965 attrib_in += 2;
2966 }
2967 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002968 Py_INCREF(Py_None);
2969 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002970 }
2971
2972 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973 /* shortcut */
2974 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2975 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002976 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002978 if (attrib == Py_None) {
2979 Py_DECREF(attrib);
2980 attrib = PyDict_New();
2981 if (!attrib) {
2982 Py_DECREF(tag);
2983 return;
2984 }
2985 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002987 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 res = NULL;
2989
2990 Py_DECREF(tag);
2991 Py_DECREF(attrib);
2992
2993 Py_XDECREF(res);
2994}
2995
2996static void
2997expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2998 int data_len)
2999{
3000 PyObject* data;
3001 PyObject* res;
3002
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003003 if (PyErr_Occurred())
3004 return;
3005
Neal Norwitz0269b912007-08-08 06:56:02 +00003006 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003007 if (!data)
3008 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009
3010 if (TreeBuilder_CheckExact(self->target))
3011 /* shortcut */
3012 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3013 else if (self->handle_data)
3014 res = PyObject_CallFunction(self->handle_data, "O", data);
3015 else
3016 res = NULL;
3017
3018 Py_DECREF(data);
3019
3020 Py_XDECREF(res);
3021}
3022
3023static void
3024expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3025{
3026 PyObject* tag;
3027 PyObject* res = NULL;
3028
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003029 if (PyErr_Occurred())
3030 return;
3031
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003032 if (TreeBuilder_CheckExact(self->target))
3033 /* shortcut */
3034 /* the standard tree builder doesn't look at the end tag */
3035 res = treebuilder_handle_end(
3036 (TreeBuilderObject*) self->target, Py_None
3037 );
3038 else if (self->handle_end) {
3039 tag = makeuniversal(self, tag_in);
3040 if (tag) {
3041 res = PyObject_CallFunction(self->handle_end, "O", tag);
3042 Py_DECREF(tag);
3043 }
3044 }
3045
3046 Py_XDECREF(res);
3047}
3048
3049static void
3050expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3051 const XML_Char *uri)
3052{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003053 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3054 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003055
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003056 if (PyErr_Occurred())
3057 return;
3058
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003059 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003060 return;
3061
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003062 if (!uri)
3063 uri = "";
3064 if (!prefix)
3065 prefix = "";
3066
3067 parcel = Py_BuildValue("ss", prefix, uri);
3068 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003069 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003070 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3071 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003072}
3073
3074static void
3075expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3076{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003077 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3078
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003079 if (PyErr_Occurred())
3080 return;
3081
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003082 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003083 return;
3084
3085 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003086}
3087
3088static void
3089expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3090{
3091 PyObject* comment;
3092 PyObject* res;
3093
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003094 if (PyErr_Occurred())
3095 return;
3096
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003097 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003098 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003099 if (comment) {
3100 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3101 Py_XDECREF(res);
3102 Py_DECREF(comment);
3103 }
3104 }
3105}
3106
Eli Bendersky45839902013-01-13 05:14:47 -08003107static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003108expat_start_doctype_handler(XMLParserObject *self,
3109 const XML_Char *doctype_name,
3110 const XML_Char *sysid,
3111 const XML_Char *pubid,
3112 int has_internal_subset)
3113{
3114 PyObject *self_pyobj = (PyObject *)self;
3115 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3116 PyObject *parser_doctype = NULL;
3117 PyObject *res = NULL;
3118
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003119 if (PyErr_Occurred())
3120 return;
3121
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003122 doctype_name_obj = makeuniversal(self, doctype_name);
3123 if (!doctype_name_obj)
3124 return;
3125
3126 if (sysid) {
3127 sysid_obj = makeuniversal(self, sysid);
3128 if (!sysid_obj) {
3129 Py_DECREF(doctype_name_obj);
3130 return;
3131 }
3132 } else {
3133 Py_INCREF(Py_None);
3134 sysid_obj = Py_None;
3135 }
3136
3137 if (pubid) {
3138 pubid_obj = makeuniversal(self, pubid);
3139 if (!pubid_obj) {
3140 Py_DECREF(doctype_name_obj);
3141 Py_DECREF(sysid_obj);
3142 return;
3143 }
3144 } else {
3145 Py_INCREF(Py_None);
3146 pubid_obj = Py_None;
3147 }
3148
3149 /* If the target has a handler for doctype, call it. */
3150 if (self->handle_doctype) {
3151 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3152 doctype_name_obj, pubid_obj, sysid_obj);
3153 Py_CLEAR(res);
3154 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003155 else {
3156 /* Now see if the parser itself has a doctype method. If yes and it's
3157 * a custom method, call it but warn about deprecation. If it's only
3158 * the vanilla XMLParser method, do nothing.
3159 */
3160 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3161 if (parser_doctype &&
3162 !(PyCFunction_Check(parser_doctype) &&
3163 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3164 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003165 (PyCFunction) _elementtree_XMLParser_doctype)) {
3166 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3167 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003168 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003169 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003170 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003171 res = PyObject_CallFunction(parser_doctype, "OOO",
3172 doctype_name_obj, pubid_obj, sysid_obj);
3173 Py_CLEAR(res);
3174 }
3175 }
3176
3177clear:
3178 Py_XDECREF(parser_doctype);
3179 Py_DECREF(doctype_name_obj);
3180 Py_DECREF(pubid_obj);
3181 Py_DECREF(sysid_obj);
3182}
3183
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003184static void
3185expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3186 const XML_Char* data_in)
3187{
3188 PyObject* target;
3189 PyObject* data;
3190 PyObject* res;
3191
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003192 if (PyErr_Occurred())
3193 return;
3194
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003195 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003196 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3197 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003198 if (target && data) {
3199 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3200 Py_XDECREF(res);
3201 Py_DECREF(data);
3202 Py_DECREF(target);
3203 } else {
3204 Py_XDECREF(data);
3205 Py_XDECREF(target);
3206 }
3207 }
3208}
3209
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003210/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211
Eli Bendersky52467b12012-06-01 07:13:08 +03003212static PyObject *
3213xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003214{
Eli Bendersky52467b12012-06-01 07:13:08 +03003215 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3216 if (self) {
3217 self->parser = NULL;
3218 self->target = self->entity = self->names = NULL;
3219 self->handle_start = self->handle_data = self->handle_end = NULL;
3220 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003221 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003222 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003223 return (PyObject *)self;
3224}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003225
Serhiy Storchakacb985562015-05-04 15:32:48 +03003226/*[clinic input]
3227_elementtree.XMLParser.__init__
3228
3229 html: object = NULL
3230 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003231 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003232
3233[clinic start generated code]*/
3234
Eli Bendersky52467b12012-06-01 07:13:08 +03003235static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003236_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3237 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003238/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003239{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003240 self->entity = PyDict_New();
3241 if (!self->entity)
3242 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243
Serhiy Storchakacb985562015-05-04 15:32:48 +03003244 self->names = PyDict_New();
3245 if (!self->names) {
3246 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003247 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003249
Serhiy Storchakacb985562015-05-04 15:32:48 +03003250 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3251 if (!self->parser) {
3252 Py_CLEAR(self->entity);
3253 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003254 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 }
3257
Eli Bendersky52467b12012-06-01 07:13:08 +03003258 if (target) {
3259 Py_INCREF(target);
3260 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003261 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003262 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003263 Py_CLEAR(self->entity);
3264 Py_CLEAR(self->names);
3265 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003266 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003268 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003269 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270
Serhiy Storchakacb985562015-05-04 15:32:48 +03003271 self->handle_start = PyObject_GetAttrString(target, "start");
3272 self->handle_data = PyObject_GetAttrString(target, "data");
3273 self->handle_end = PyObject_GetAttrString(target, "end");
3274 self->handle_comment = PyObject_GetAttrString(target, "comment");
3275 self->handle_pi = PyObject_GetAttrString(target, "pi");
3276 self->handle_close = PyObject_GetAttrString(target, "close");
3277 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278
3279 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003280
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003281 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003282 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285 (XML_StartElementHandler) expat_start_handler,
3286 (XML_EndElementHandler) expat_end_handler
3287 );
3288 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 (XML_DefaultHandler) expat_default_handler
3291 );
3292 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 (XML_CharacterDataHandler) expat_data_handler
3295 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003298 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 (XML_CommentHandler) expat_comment_handler
3300 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 (XML_ProcessingInstructionHandler) expat_pi_handler
3305 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003306 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003307 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003308 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3309 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003310 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003311 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003312 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003313 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314
Eli Bendersky52467b12012-06-01 07:13:08 +03003315 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316}
3317
Eli Bendersky52467b12012-06-01 07:13:08 +03003318static int
3319xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3320{
3321 Py_VISIT(self->handle_close);
3322 Py_VISIT(self->handle_pi);
3323 Py_VISIT(self->handle_comment);
3324 Py_VISIT(self->handle_end);
3325 Py_VISIT(self->handle_data);
3326 Py_VISIT(self->handle_start);
3327
3328 Py_VISIT(self->target);
3329 Py_VISIT(self->entity);
3330 Py_VISIT(self->names);
3331
3332 return 0;
3333}
3334
3335static int
3336xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337{
3338 EXPAT(ParserFree)(self->parser);
3339
Antoine Pitrouc1948842012-10-01 23:40:37 +02003340 Py_CLEAR(self->handle_close);
3341 Py_CLEAR(self->handle_pi);
3342 Py_CLEAR(self->handle_comment);
3343 Py_CLEAR(self->handle_end);
3344 Py_CLEAR(self->handle_data);
3345 Py_CLEAR(self->handle_start);
3346 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347
Antoine Pitrouc1948842012-10-01 23:40:37 +02003348 Py_CLEAR(self->target);
3349 Py_CLEAR(self->entity);
3350 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003351
Eli Bendersky52467b12012-06-01 07:13:08 +03003352 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003353}
3354
Eli Bendersky52467b12012-06-01 07:13:08 +03003355static void
3356xmlparser_dealloc(XMLParserObject* self)
3357{
3358 PyObject_GC_UnTrack(self);
3359 xmlparser_gc_clear(self);
3360 Py_TYPE(self)->tp_free((PyObject *)self);
3361}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003362
3363LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003364expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003365{
3366 int ok;
3367
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003368 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3370
3371 if (PyErr_Occurred())
3372 return NULL;
3373
3374 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003375 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003376 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003378 EXPAT(GetErrorColumnNumber)(self->parser),
3379 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380 );
3381 return NULL;
3382 }
3383
3384 Py_RETURN_NONE;
3385}
3386
Serhiy Storchakacb985562015-05-04 15:32:48 +03003387/*[clinic input]
3388_elementtree.XMLParser.close
3389
3390[clinic start generated code]*/
3391
3392static PyObject *
3393_elementtree_XMLParser_close_impl(XMLParserObject *self)
3394/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003395{
3396 /* end feeding data to parser */
3397
3398 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003400 if (!res)
3401 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003403 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404 Py_DECREF(res);
3405 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003406 }
3407 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003408 Py_DECREF(res);
3409 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003410 }
3411 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003412 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414}
3415
Serhiy Storchakacb985562015-05-04 15:32:48 +03003416/*[clinic input]
3417_elementtree.XMLParser.feed
3418
3419 data: object
3420 /
3421
3422[clinic start generated code]*/
3423
3424static PyObject *
3425_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3426/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003427{
3428 /* feed data to parser */
3429
Serhiy Storchakacb985562015-05-04 15:32:48 +03003430 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003431 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003432 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3433 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003434 return NULL;
3435 if (data_len > INT_MAX) {
3436 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3437 return NULL;
3438 }
3439 /* Explicitly set UTF-8 encoding. Return code ignored. */
3440 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003441 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003442 }
3443 else {
3444 Py_buffer view;
3445 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003446 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003447 return NULL;
3448 if (view.len > INT_MAX) {
3449 PyBuffer_Release(&view);
3450 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3451 return NULL;
3452 }
3453 res = expat_parse(self, view.buf, (int)view.len, 0);
3454 PyBuffer_Release(&view);
3455 return res;
3456 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003457}
3458
Serhiy Storchakacb985562015-05-04 15:32:48 +03003459/*[clinic input]
3460_elementtree.XMLParser._parse_whole
3461
3462 file: object
3463 /
3464
3465[clinic start generated code]*/
3466
3467static PyObject *
3468_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3469/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003470{
Eli Benderskya3699232013-05-19 18:47:23 -07003471 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003472 PyObject* reader;
3473 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003474 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003475 PyObject* res;
3476
Serhiy Storchakacb985562015-05-04 15:32:48 +03003477 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003478 if (!reader)
3479 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003480
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003481 /* read from open file object */
3482 for (;;) {
3483
3484 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3485
3486 if (!buffer) {
3487 /* read failed (e.g. due to KeyboardInterrupt) */
3488 Py_DECREF(reader);
3489 return NULL;
3490 }
3491
Eli Benderskyf996e772012-03-16 05:53:30 +02003492 if (PyUnicode_CheckExact(buffer)) {
3493 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003494 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 Py_DECREF(buffer);
3496 break;
3497 }
3498 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003499 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003500 if (!temp) {
3501 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003502 Py_DECREF(reader);
3503 return NULL;
3504 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003505 buffer = temp;
3506 }
3507 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508 Py_DECREF(buffer);
3509 break;
3510 }
3511
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003512 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3513 Py_DECREF(buffer);
3514 Py_DECREF(reader);
3515 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3516 return NULL;
3517 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003518 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003519 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003520 );
3521
3522 Py_DECREF(buffer);
3523
3524 if (!res) {
3525 Py_DECREF(reader);
3526 return NULL;
3527 }
3528 Py_DECREF(res);
3529
3530 }
3531
3532 Py_DECREF(reader);
3533
3534 res = expat_parse(self, "", 0, 1);
3535
3536 if (res && TreeBuilder_CheckExact(self->target)) {
3537 Py_DECREF(res);
3538 return treebuilder_done((TreeBuilderObject*) self->target);
3539 }
3540
3541 return res;
3542}
3543
Serhiy Storchakacb985562015-05-04 15:32:48 +03003544/*[clinic input]
3545_elementtree.XMLParser.doctype
3546
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003547 name: object
3548 pubid: object
3549 system: object
3550 /
3551
Serhiy Storchakacb985562015-05-04 15:32:48 +03003552[clinic start generated code]*/
3553
3554static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003555_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3556 PyObject *pubid, PyObject *system)
3557/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003558{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003559 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3560 "This method of XMLParser is deprecated. Define"
3561 " doctype() method on the TreeBuilder target.",
3562 1) < 0) {
3563 return NULL;
3564 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003565 Py_RETURN_NONE;
3566}
3567
Serhiy Storchakacb985562015-05-04 15:32:48 +03003568/*[clinic input]
3569_elementtree.XMLParser._setevents
3570
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003571 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003572 events_to_report: object = None
3573 /
3574
3575[clinic start generated code]*/
3576
3577static PyObject *
3578_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3579 PyObject *events_queue,
3580 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003581/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582{
3583 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003584 Py_ssize_t i, seqlen;
3585 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003586 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003587
3588 if (!TreeBuilder_CheckExact(self->target)) {
3589 PyErr_SetString(
3590 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003591 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003592 "targets"
3593 );
3594 return NULL;
3595 }
3596
3597 target = (TreeBuilderObject*) self->target;
3598
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003599 events_append = PyObject_GetAttrString(events_queue, "append");
3600 if (events_append == NULL)
3601 return NULL;
3602 Py_XDECREF(target->events_append);
3603 target->events_append = events_append;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
3605 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003606 Py_CLEAR(target->start_event_obj);
3607 Py_CLEAR(target->end_event_obj);
3608 Py_CLEAR(target->start_ns_event_obj);
3609 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003610
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003611 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003613 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 Py_RETURN_NONE;
3615 }
3616
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003617 if (!(events_seq = PySequence_Fast(events_to_report,
3618 "events must be a sequence"))) {
3619 return NULL;
3620 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003622 seqlen = PySequence_Size(events_seq);
3623 for (i = 0; i < seqlen; ++i) {
3624 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3625 char *event_name = NULL;
3626 if (PyUnicode_Check(event_name_obj)) {
3627 event_name = _PyUnicode_AsString(event_name_obj);
3628 } else if (PyBytes_Check(event_name_obj)) {
3629 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003630 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003631
3632 if (event_name == NULL) {
3633 Py_DECREF(events_seq);
3634 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3635 return NULL;
3636 } else if (strcmp(event_name, "start") == 0) {
3637 Py_INCREF(event_name_obj);
3638 target->start_event_obj = event_name_obj;
3639 } else if (strcmp(event_name, "end") == 0) {
3640 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003641 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 target->end_event_obj = event_name_obj;
3643 } else if (strcmp(event_name, "start-ns") == 0) {
3644 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003645 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003646 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003647 EXPAT(SetNamespaceDeclHandler)(
3648 self->parser,
3649 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3650 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3651 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003652 } else if (strcmp(event_name, "end-ns") == 0) {
3653 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003654 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003655 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656 EXPAT(SetNamespaceDeclHandler)(
3657 self->parser,
3658 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3659 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3660 );
3661 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003662 Py_DECREF(events_seq);
3663 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664 return NULL;
3665 }
3666 }
3667
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003668 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670}
3671
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003672static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003673xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003674{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003675 if (PyUnicode_Check(nameobj)) {
3676 PyObject* res;
3677 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3678 res = self->entity;
3679 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3680 res = self->target;
3681 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3682 return PyUnicode_FromFormat(
3683 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003685 }
3686 else
3687 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688
Alexander Belopolskye239d232010-12-08 23:31:48 +00003689 Py_INCREF(res);
3690 return res;
3691 }
3692 generic:
3693 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003694}
3695
Serhiy Storchakacb985562015-05-04 15:32:48 +03003696#include "clinic/_elementtree.c.h"
3697
3698static PyMethodDef element_methods[] = {
3699
3700 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3701
3702 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3703 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3704
3705 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3706 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3707 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3708
3709 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3710 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3711 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3712 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3713
3714 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3715 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3716 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3717
3718 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3719 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3720
3721 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3722 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3723
3724 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3725
3726 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3727 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3728 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3729 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3730 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3731
3732 {NULL, NULL}
3733};
3734
3735static PyMappingMethods element_as_mapping = {
3736 (lenfunc) element_length,
3737 (binaryfunc) element_subscr,
3738 (objobjargproc) element_ass_subscr,
3739};
3740
Serhiy Storchakadde08152015-11-25 15:28:13 +02003741static PyGetSetDef element_getsetlist[] = {
3742 {"tag",
3743 (getter)element_tag_getter,
3744 (setter)element_tag_setter,
3745 "A string identifying what kind of data this element represents"},
3746 {"text",
3747 (getter)element_text_getter,
3748 (setter)element_text_setter,
3749 "A string of text directly after the start tag, or None"},
3750 {"tail",
3751 (getter)element_tail_getter,
3752 (setter)element_tail_setter,
3753 "A string of text directly after the end tag, or None"},
3754 {"attrib",
3755 (getter)element_attrib_getter,
3756 (setter)element_attrib_setter,
3757 "A dictionary containing the element's attributes"},
3758 {NULL},
3759};
3760
Serhiy Storchakacb985562015-05-04 15:32:48 +03003761static PyTypeObject Element_Type = {
3762 PyVarObject_HEAD_INIT(NULL, 0)
3763 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3764 /* methods */
3765 (destructor)element_dealloc, /* tp_dealloc */
3766 0, /* tp_print */
3767 0, /* tp_getattr */
3768 0, /* tp_setattr */
3769 0, /* tp_reserved */
3770 (reprfunc)element_repr, /* tp_repr */
3771 0, /* tp_as_number */
3772 &element_as_sequence, /* tp_as_sequence */
3773 &element_as_mapping, /* tp_as_mapping */
3774 0, /* tp_hash */
3775 0, /* tp_call */
3776 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003777 PyObject_GenericGetAttr, /* tp_getattro */
3778 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003779 0, /* tp_as_buffer */
3780 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3781 /* tp_flags */
3782 0, /* tp_doc */
3783 (traverseproc)element_gc_traverse, /* tp_traverse */
3784 (inquiry)element_gc_clear, /* tp_clear */
3785 0, /* tp_richcompare */
3786 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3787 0, /* tp_iter */
3788 0, /* tp_iternext */
3789 element_methods, /* tp_methods */
3790 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003791 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003792 0, /* tp_base */
3793 0, /* tp_dict */
3794 0, /* tp_descr_get */
3795 0, /* tp_descr_set */
3796 0, /* tp_dictoffset */
3797 (initproc)element_init, /* tp_init */
3798 PyType_GenericAlloc, /* tp_alloc */
3799 element_new, /* tp_new */
3800 0, /* tp_free */
3801};
3802
3803static PyMethodDef treebuilder_methods[] = {
3804 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3805 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3806 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3807 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3808 {NULL, NULL}
3809};
3810
3811static PyTypeObject TreeBuilder_Type = {
3812 PyVarObject_HEAD_INIT(NULL, 0)
3813 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3814 /* methods */
3815 (destructor)treebuilder_dealloc, /* tp_dealloc */
3816 0, /* tp_print */
3817 0, /* tp_getattr */
3818 0, /* tp_setattr */
3819 0, /* tp_reserved */
3820 0, /* tp_repr */
3821 0, /* tp_as_number */
3822 0, /* tp_as_sequence */
3823 0, /* tp_as_mapping */
3824 0, /* tp_hash */
3825 0, /* tp_call */
3826 0, /* tp_str */
3827 0, /* tp_getattro */
3828 0, /* tp_setattro */
3829 0, /* tp_as_buffer */
3830 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3831 /* tp_flags */
3832 0, /* tp_doc */
3833 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3834 (inquiry)treebuilder_gc_clear, /* tp_clear */
3835 0, /* tp_richcompare */
3836 0, /* tp_weaklistoffset */
3837 0, /* tp_iter */
3838 0, /* tp_iternext */
3839 treebuilder_methods, /* tp_methods */
3840 0, /* tp_members */
3841 0, /* tp_getset */
3842 0, /* tp_base */
3843 0, /* tp_dict */
3844 0, /* tp_descr_get */
3845 0, /* tp_descr_set */
3846 0, /* tp_dictoffset */
3847 _elementtree_TreeBuilder___init__, /* tp_init */
3848 PyType_GenericAlloc, /* tp_alloc */
3849 treebuilder_new, /* tp_new */
3850 0, /* tp_free */
3851};
3852
3853static PyMethodDef xmlparser_methods[] = {
3854 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3855 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3856 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3857 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3858 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3859 {NULL, NULL}
3860};
3861
Neal Norwitz227b5332006-03-22 09:28:35 +00003862static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003863 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003864 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003865 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003866 (destructor)xmlparser_dealloc, /* tp_dealloc */
3867 0, /* tp_print */
3868 0, /* tp_getattr */
3869 0, /* tp_setattr */
3870 0, /* tp_reserved */
3871 0, /* tp_repr */
3872 0, /* tp_as_number */
3873 0, /* tp_as_sequence */
3874 0, /* tp_as_mapping */
3875 0, /* tp_hash */
3876 0, /* tp_call */
3877 0, /* tp_str */
3878 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3879 0, /* tp_setattro */
3880 0, /* tp_as_buffer */
3881 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3882 /* tp_flags */
3883 0, /* tp_doc */
3884 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3885 (inquiry)xmlparser_gc_clear, /* tp_clear */
3886 0, /* tp_richcompare */
3887 0, /* tp_weaklistoffset */
3888 0, /* tp_iter */
3889 0, /* tp_iternext */
3890 xmlparser_methods, /* tp_methods */
3891 0, /* tp_members */
3892 0, /* tp_getset */
3893 0, /* tp_base */
3894 0, /* tp_dict */
3895 0, /* tp_descr_get */
3896 0, /* tp_descr_set */
3897 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003898 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003899 PyType_GenericAlloc, /* tp_alloc */
3900 xmlparser_new, /* tp_new */
3901 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003902};
3903
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003904/* ==================================================================== */
3905/* python module interface */
3906
3907static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003908 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003909 {NULL, NULL}
3910};
3911
Martin v. Löwis1a214512008-06-11 05:26:20 +00003912
Eli Bendersky532d03e2013-08-10 08:00:39 -07003913static struct PyModuleDef elementtreemodule = {
3914 PyModuleDef_HEAD_INIT,
3915 "_elementtree",
3916 NULL,
3917 sizeof(elementtreestate),
3918 _functions,
3919 NULL,
3920 elementtree_traverse,
3921 elementtree_clear,
3922 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003923};
3924
Neal Norwitzf6657e62006-12-28 04:47:50 +00003925PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003926PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003927{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003928 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003929 elementtreestate *st;
3930
3931 m = PyState_FindModule(&elementtreemodule);
3932 if (m) {
3933 Py_INCREF(m);
3934 return m;
3935 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003936
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003937 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003938 if (PyType_Ready(&ElementIter_Type) < 0)
3939 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003940 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003941 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003942 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003943 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003944 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003945 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003946
Eli Bendersky532d03e2013-08-10 08:00:39 -07003947 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003948 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003949 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003950 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003951
Eli Bendersky828efde2012-04-05 05:40:58 +03003952 if (!(temp = PyImport_ImportModule("copy")))
3953 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003954 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003955 Py_XDECREF(temp);
3956
Eli Bendersky532d03e2013-08-10 08:00:39 -07003957 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003958 return NULL;
3959
Eli Bendersky20d41742012-06-01 09:48:37 +03003960 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003961 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3962 if (expat_capi) {
3963 /* check that it's usable */
3964 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003965 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003966 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3967 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003968 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003969 PyErr_SetString(PyExc_ImportError,
3970 "pyexpat version is incompatible");
3971 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003972 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003973 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003974 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003975 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003976
Eli Bendersky532d03e2013-08-10 08:00:39 -07003977 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003978 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003979 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003980 Py_INCREF(st->parseerror_obj);
3981 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003982
Eli Bendersky092af1f2012-03-04 07:14:03 +02003983 Py_INCREF((PyObject *)&Element_Type);
3984 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3985
Eli Bendersky58d548d2012-05-29 15:45:16 +03003986 Py_INCREF((PyObject *)&TreeBuilder_Type);
3987 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3988
Eli Bendersky52467b12012-06-01 07:13:08 +03003989 Py_INCREF((PyObject *)&XMLParser_Type);
3990 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003991
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003992 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003993}