blob: 7d50dd07a961f2e9d85bd1d09647094f8b5876cb [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200655 Py_TRASHCAN_SAFE_BEGIN(self)
Eli Benderskyebf37a22012-04-03 22:02:37 +0300656
657 if (self->weakreflist != NULL)
658 PyObject_ClearWeakRefs((PyObject *) self);
659
Eli Bendersky0192ba32012-03-30 16:38:33 +0300660 /* element_gc_clear clears all references and deallocates extra
661 */
662 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200665 Py_TYPE(self)->tp_free((PyObject *)self);
Serhiy Storchaka18f018c2016-12-21 12:32:56 +0200666 Py_TRASHCAN_SAFE_END(self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000667}
668
669/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000670
Serhiy Storchakacb985562015-05-04 15:32:48 +0300671/*[clinic input]
672_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000673
Serhiy Storchakacb985562015-05-04 15:32:48 +0300674 subelement: object(subclass_of='&Element_Type')
675 /
676
677[clinic start generated code]*/
678
679static PyObject *
680_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
681/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
682{
683 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684 return NULL;
685
686 Py_RETURN_NONE;
687}
688
Serhiy Storchakacb985562015-05-04 15:32:48 +0300689/*[clinic input]
690_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000691
Serhiy Storchakacb985562015-05-04 15:32:48 +0300692[clinic start generated code]*/
693
694static PyObject *
695_elementtree_Element_clear_impl(ElementObject *self)
696/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
697{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300698 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000699
700 Py_INCREF(Py_None);
701 Py_DECREF(JOIN_OBJ(self->text));
702 self->text = Py_None;
703
704 Py_INCREF(Py_None);
705 Py_DECREF(JOIN_OBJ(self->tail));
706 self->tail = Py_None;
707
708 Py_RETURN_NONE;
709}
710
Serhiy Storchakacb985562015-05-04 15:32:48 +0300711/*[clinic input]
712_elementtree.Element.__copy__
713
714[clinic start generated code]*/
715
716static PyObject *
717_elementtree_Element___copy___impl(ElementObject *self)
718/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200720 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 ElementObject* element;
722
Eli Bendersky092af1f2012-03-04 07:14:03 +0200723 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800724 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000725 if (!element)
726 return NULL;
727
728 Py_DECREF(JOIN_OBJ(element->text));
729 element->text = self->text;
730 Py_INCREF(JOIN_OBJ(element->text));
731
732 Py_DECREF(JOIN_OBJ(element->tail));
733 element->tail = self->tail;
734 Py_INCREF(JOIN_OBJ(element->tail));
735
736 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000737 if (element_resize(element, self->extra->length) < 0) {
738 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000740 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000741
742 for (i = 0; i < self->extra->length; i++) {
743 Py_INCREF(self->extra->children[i]);
744 element->extra->children[i] = self->extra->children[i];
745 }
746
747 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000748 }
749
750 return (PyObject*) element;
751}
752
Serhiy Storchakacb985562015-05-04 15:32:48 +0300753/*[clinic input]
754_elementtree.Element.__deepcopy__
755
756 memo: object
757 /
758
759[clinic start generated code]*/
760
761static PyObject *
762_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
763/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200765 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766 ElementObject* element;
767 PyObject* tag;
768 PyObject* attrib;
769 PyObject* text;
770 PyObject* tail;
771 PyObject* id;
772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 tag = deepcopy(self->tag, memo);
774 if (!tag)
775 return NULL;
776
777 if (self->extra) {
778 attrib = deepcopy(self->extra->attrib, memo);
779 if (!attrib) {
780 Py_DECREF(tag);
781 return NULL;
782 }
783 } else {
784 Py_INCREF(Py_None);
785 attrib = Py_None;
786 }
787
Eli Bendersky092af1f2012-03-04 07:14:03 +0200788 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000789
790 Py_DECREF(tag);
791 Py_DECREF(attrib);
792
793 if (!element)
794 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100795
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000796 text = deepcopy(JOIN_OBJ(self->text), memo);
797 if (!text)
798 goto error;
799 Py_DECREF(element->text);
800 element->text = JOIN_SET(text, JOIN_GET(self->text));
801
802 tail = deepcopy(JOIN_OBJ(self->tail), memo);
803 if (!tail)
804 goto error;
805 Py_DECREF(element->tail);
806 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
807
808 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000809 if (element_resize(element, self->extra->length) < 0)
810 goto error;
811
812 for (i = 0; i < self->extra->length; i++) {
813 PyObject* child = deepcopy(self->extra->children[i], memo);
814 if (!child) {
815 element->extra->length = i;
816 goto error;
817 }
818 element->extra->children[i] = child;
819 }
820
821 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000822 }
823
824 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200825 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000826 if (!id)
827 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000828
829 i = PyDict_SetItem(memo, id, (PyObject*) element);
830
831 Py_DECREF(id);
832
833 if (i < 0)
834 goto error;
835
836 return (PyObject*) element;
837
838 error:
839 Py_DECREF(element);
840 return NULL;
841}
842
Serhiy Storchakacb985562015-05-04 15:32:48 +0300843/*[clinic input]
844_elementtree.Element.__sizeof__ -> Py_ssize_t
845
846[clinic start generated code]*/
847
848static Py_ssize_t
849_elementtree_Element___sizeof___impl(ElementObject *self)
850/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200851{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200852 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200853 if (self->extra) {
854 result += sizeof(ElementObjectExtra);
855 if (self->extra->children != self->extra->_children)
856 result += sizeof(PyObject*) * self->extra->allocated;
857 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300858 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200859}
860
Eli Bendersky698bdb22013-01-10 06:01:06 -0800861/* dict keys for getstate/setstate. */
862#define PICKLED_TAG "tag"
863#define PICKLED_CHILDREN "_children"
864#define PICKLED_ATTRIB "attrib"
865#define PICKLED_TAIL "tail"
866#define PICKLED_TEXT "text"
867
868/* __getstate__ returns a fabricated instance dict as in the pure-Python
869 * Element implementation, for interoperability/interchangeability. This
870 * makes the pure-Python implementation details an API, but (a) there aren't
871 * any unnecessary structures there; and (b) it buys compatibility with 3.2
872 * pickles. See issue #16076.
873 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300874/*[clinic input]
875_elementtree.Element.__getstate__
876
877[clinic start generated code]*/
878
Eli Bendersky698bdb22013-01-10 06:01:06 -0800879static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300880_elementtree_Element___getstate___impl(ElementObject *self)
881/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200883 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800884 PyObject *instancedict = NULL, *children;
885
886 /* Build a list of children. */
887 children = PyList_New(self->extra ? self->extra->length : 0);
888 if (!children)
889 return NULL;
890 for (i = 0; i < PyList_GET_SIZE(children); i++) {
891 PyObject *child = self->extra->children[i];
892 Py_INCREF(child);
893 PyList_SET_ITEM(children, i, child);
894 }
895
896 /* Construct the state object. */
897 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
898 if (noattrib)
899 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
900 PICKLED_TAG, self->tag,
901 PICKLED_CHILDREN, children,
902 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700903 PICKLED_TEXT, JOIN_OBJ(self->text),
904 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905 else
906 instancedict = Py_BuildValue("{sOsOsOsOsO}",
907 PICKLED_TAG, self->tag,
908 PICKLED_CHILDREN, children,
909 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700910 PICKLED_TEXT, JOIN_OBJ(self->text),
911 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800912 if (instancedict) {
913 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800915 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800916 else {
917 for (i = 0; i < PyList_GET_SIZE(children); i++)
918 Py_DECREF(PyList_GET_ITEM(children, i));
919 Py_DECREF(children);
920
921 return NULL;
922 }
923}
924
925static PyObject *
926element_setstate_from_attributes(ElementObject *self,
927 PyObject *tag,
928 PyObject *attrib,
929 PyObject *text,
930 PyObject *tail,
931 PyObject *children)
932{
933 Py_ssize_t i, nchildren;
934
935 if (!tag) {
936 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
937 return NULL;
938 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800939
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200940 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300941 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800942
Eli Benderskydd3661e2013-09-13 06:24:25 -0700943 _clear_joined_ptr(&self->text);
944 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
945 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800946
Eli Benderskydd3661e2013-09-13 06:24:25 -0700947 _clear_joined_ptr(&self->tail);
948 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
949 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800950
951 /* Handle ATTRIB and CHILDREN. */
952 if (!children && !attrib)
953 Py_RETURN_NONE;
954
955 /* Compute 'nchildren'. */
956 if (children) {
957 if (!PyList_Check(children)) {
958 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
959 return NULL;
960 }
961 nchildren = PyList_Size(children);
962 }
963 else {
964 nchildren = 0;
965 }
966
967 /* Allocate 'extra'. */
968 if (element_resize(self, nchildren)) {
969 return NULL;
970 }
971 assert(self->extra && self->extra->allocated >= nchildren);
972
973 /* Copy children */
974 for (i = 0; i < nchildren; i++) {
975 self->extra->children[i] = PyList_GET_ITEM(children, i);
976 Py_INCREF(self->extra->children[i]);
977 }
978
979 self->extra->length = nchildren;
980 self->extra->allocated = nchildren;
981
982 /* Stash attrib. */
983 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300985 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800986 }
987
988 Py_RETURN_NONE;
989}
990
991/* __setstate__ for Element instance from the Python implementation.
992 * 'state' should be the instance dict.
993 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300994
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995static PyObject *
996element_setstate_from_Python(ElementObject *self, PyObject *state)
997{
998 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
999 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1000 PyObject *args;
1001 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001002 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004 tag = attrib = text = tail = children = NULL;
1005 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001008
1009 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1010 &attrib, &text, &tail, &children))
1011 retval = element_setstate_from_attributes(self, tag, attrib, text,
1012 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001014 retval = NULL;
1015
1016 Py_DECREF(args);
1017 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018}
1019
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020/*[clinic input]
1021_elementtree.Element.__setstate__
1022
1023 state: object
1024 /
1025
1026[clinic start generated code]*/
1027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001029_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1030/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031{
1032 if (!PyDict_CheckExact(state)) {
1033 PyErr_Format(PyExc_TypeError,
1034 "Don't know how to unpickle \"%.200R\" as an Element",
1035 state);
1036 return NULL;
1037 }
1038 else
1039 return element_setstate_from_Python(self, state);
1040}
1041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042LOCAL(int)
1043checkpath(PyObject* tag)
1044{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 Py_ssize_t i;
1046 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047
1048 /* check if a tag contains an xpath character */
1049
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050#define PATHCHAR(ch) \
1051 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1055 void *data = PyUnicode_DATA(tag);
1056 unsigned int kind = PyUnicode_KIND(tag);
1057 for (i = 0; i < len; i++) {
1058 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1059 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 return 1;
1065 }
1066 return 0;
1067 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001068 if (PyBytes_Check(tag)) {
1069 char *p = PyBytes_AS_STRING(tag);
1070 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071 if (p[i] == '{')
1072 check = 0;
1073 else if (p[i] == '}')
1074 check = 1;
1075 else if (check && PATHCHAR(p[i]))
1076 return 1;
1077 }
1078 return 0;
1079 }
1080
1081 return 1; /* unknown type; might be path expression */
1082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.extend
1086
1087 elements: object
1088 /
1089
1090[clinic start generated code]*/
1091
1092static PyObject *
1093_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1094/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095{
1096 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001097 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098
Serhiy Storchakacb985562015-05-04 15:32:48 +03001099 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100 if (!seq) {
1101 PyErr_Format(
1102 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104 );
1105 return NULL;
1106 }
1107
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001110 Py_INCREF(element);
1111 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001112 PyErr_Format(
1113 PyExc_TypeError,
1114 "expected an Element, not \"%.200s\"",
1115 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001116 Py_DECREF(seq);
1117 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001118 return NULL;
1119 }
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (element_add_subelement(self, element) < 0) {
1122 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 return NULL;
1125 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 }
1128
1129 Py_DECREF(seq);
1130
1131 Py_RETURN_NONE;
1132}
1133
Serhiy Storchakacb985562015-05-04 15:32:48 +03001134/*[clinic input]
1135_elementtree.Element.find
1136
1137 path: object
1138 namespaces: object = None
1139
1140[clinic start generated code]*/
1141
1142static PyObject *
1143_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1144 PyObject *namespaces)
1145/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001147 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001148 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149
Serhiy Storchakacb985562015-05-04 15:32:48 +03001150 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001151 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001153 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001155 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156
1157 if (!self->extra)
1158 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 for (i = 0; i < self->extra->length; i++) {
1161 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001162 int rc;
1163 if (!Element_CheckExact(item))
1164 continue;
1165 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001166 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_DECREF(item);
1170 if (rc < 0)
1171 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 }
1173
1174 Py_RETURN_NONE;
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.findtext
1179
1180 path: object
1181 default: object = None
1182 namespaces: object = None
1183
1184[clinic start generated code]*/
1185
1186static PyObject *
1187_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1188 PyObject *default_value,
1189 PyObject *namespaces)
1190/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001192 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001193 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001194 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 );
1200
1201 if (!self->extra) {
1202 Py_INCREF(default_value);
1203 return default_value;
1204 }
1205
1206 for (i = 0; i < self->extra->length; i++) {
1207 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 int rc;
1209 if (!Element_CheckExact(item))
1210 continue;
1211 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001212 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 if (text == Py_None) {
1216 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001217 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001219 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001220 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 return text;
1222 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 Py_DECREF(item);
1224 if (rc < 0)
1225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 Py_INCREF(default_value);
1229 return default_value;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.findall
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001247 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001248 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001249
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001251 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001252 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001255 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256
1257 out = PyList_New(0);
1258 if (!out)
1259 return NULL;
1260
1261 if (!self->extra)
1262 return out;
1263
1264 for (i = 0; i < self->extra->length; i++) {
1265 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 int rc;
1267 if (!Element_CheckExact(item))
1268 continue;
1269 Py_INCREF(item);
1270 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1271 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1272 Py_DECREF(item);
1273 Py_DECREF(out);
1274 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001276 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 }
1278
1279 return out;
1280}
1281
Serhiy Storchakacb985562015-05-04 15:32:48 +03001282/*[clinic input]
1283_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001284
Serhiy Storchakacb985562015-05-04 15:32:48 +03001285 path: object
1286 namespaces: object = None
1287
1288[clinic start generated code]*/
1289
1290static PyObject *
1291_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1292 PyObject *namespaces)
1293/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1294{
1295 PyObject* tag = path;
1296 _Py_IDENTIFIER(iterfind);
1297 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001298
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001299 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001300 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001301}
1302
Serhiy Storchakacb985562015-05-04 15:32:48 +03001303/*[clinic input]
1304_elementtree.Element.get
1305
1306 key: object
1307 default: object = None
1308
1309[clinic start generated code]*/
1310
1311static PyObject *
1312_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1313 PyObject *default_value)
1314/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
1316 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
1318 if (!self->extra || self->extra->attrib == Py_None)
1319 value = default_value;
1320 else {
1321 value = PyDict_GetItem(self->extra->attrib, key);
1322 if (!value)
1323 value = default_value;
1324 }
1325
1326 Py_INCREF(value);
1327 return value;
1328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.getchildren
1332
1333[clinic start generated code]*/
1334
1335static PyObject *
1336_elementtree_Element_getchildren_impl(ElementObject *self)
1337/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001339 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340 PyObject* list;
1341
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001342 /* FIXME: report as deprecated? */
1343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344 if (!self->extra)
1345 return PyList_New(0);
1346
1347 list = PyList_New(self->extra->length);
1348 if (!list)
1349 return NULL;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
1353 Py_INCREF(item);
1354 PyList_SET_ITEM(list, i, item);
1355 }
1356
1357 return list;
1358}
1359
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001360
Eli Bendersky64d11e62012-06-15 07:42:50 +03001361static PyObject *
1362create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1363
1364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365/*[clinic input]
1366_elementtree.Element.iter
1367
1368 tag: object = None
1369
1370[clinic start generated code]*/
1371
Eli Bendersky64d11e62012-06-15 07:42:50 +03001372static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001373_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1374/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001375{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001376 if (PyUnicode_Check(tag)) {
1377 if (PyUnicode_READY(tag) < 0)
1378 return NULL;
1379 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1380 tag = Py_None;
1381 }
1382 else if (PyBytes_Check(tag)) {
1383 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1384 tag = Py_None;
1385 }
1386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001388}
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Serhiy Storchakacb985562015-05-04 15:32:48 +03001394[clinic start generated code]*/
1395
1396static PyObject *
1397_elementtree_Element_itertext_impl(ElementObject *self)
1398/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1399{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401}
1402
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001405element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001406{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001407 ElementObject* self = (ElementObject*) self_;
1408
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409 if (!self->extra || index < 0 || index >= self->extra->length) {
1410 PyErr_SetString(
1411 PyExc_IndexError,
1412 "child index out of range"
1413 );
1414 return NULL;
1415 }
1416
1417 Py_INCREF(self->extra->children[index]);
1418 return self->extra->children[index];
1419}
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
1422_elementtree.Element.insert
1423
1424 index: Py_ssize_t
1425 subelement: object(subclass_of='&Element_Type')
1426 /
1427
1428[clinic start generated code]*/
1429
1430static PyObject *
1431_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1432 PyObject *subelement)
1433/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001435 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436
Victor Stinner5f0af232013-07-11 23:01:36 +02001437 if (!self->extra) {
1438 if (create_extra(self, NULL) < 0)
1439 return NULL;
1440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001442 if (index < 0) {
1443 index += self->extra->length;
1444 if (index < 0)
1445 index = 0;
1446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001447 if (index > self->extra->length)
1448 index = self->extra->length;
1449
1450 if (element_resize(self, 1) < 0)
1451 return NULL;
1452
1453 for (i = self->extra->length; i > index; i--)
1454 self->extra->children[i] = self->extra->children[i-1];
1455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456 Py_INCREF(subelement);
1457 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458
1459 self->extra->length++;
1460
1461 Py_RETURN_NONE;
1462}
1463
Serhiy Storchakacb985562015-05-04 15:32:48 +03001464/*[clinic input]
1465_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466
Serhiy Storchakacb985562015-05-04 15:32:48 +03001467[clinic start generated code]*/
1468
1469static PyObject *
1470_elementtree_Element_items_impl(ElementObject *self)
1471/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1472{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (!self->extra || self->extra->attrib == Py_None)
1474 return PyList_New(0);
1475
1476 return PyDict_Items(self->extra->attrib);
1477}
1478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479/*[clinic input]
1480_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482[clinic start generated code]*/
1483
1484static PyObject *
1485_elementtree_Element_keys_impl(ElementObject *self)
1486/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1487{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488 if (!self->extra || self->extra->attrib == Py_None)
1489 return PyList_New(0);
1490
1491 return PyDict_Keys(self->extra->attrib);
1492}
1493
Martin v. Löwis18e16552006-02-15 17:27:45 +00001494static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495element_length(ElementObject* self)
1496{
1497 if (!self->extra)
1498 return 0;
1499
1500 return self->extra->length;
1501}
1502
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503/*[clinic input]
1504_elementtree.Element.makeelement
1505
1506 tag: object
1507 attrib: object
1508 /
1509
1510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1514 PyObject *attrib)
1515/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516{
1517 PyObject* elem;
1518
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519 attrib = PyDict_Copy(attrib);
1520 if (!attrib)
1521 return NULL;
1522
Eli Bendersky092af1f2012-03-04 07:14:03 +02001523 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524
1525 Py_DECREF(attrib);
1526
1527 return elem;
1528}
1529
Serhiy Storchakacb985562015-05-04 15:32:48 +03001530/*[clinic input]
1531_elementtree.Element.remove
1532
1533 subelement: object(subclass_of='&Element_Type')
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1540/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001542 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001543 int rc;
1544 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546 if (!self->extra) {
1547 /* element has no children, so raise exception */
1548 PyErr_SetString(
1549 PyExc_ValueError,
1550 "list.remove(x): x not in list"
1551 );
1552 return NULL;
1553 }
1554
1555 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001558 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001559 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001560 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001561 if (rc < 0)
1562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 }
1564
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001565 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001566 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 PyErr_SetString(
1568 PyExc_ValueError,
1569 "list.remove(x): x not in list"
1570 );
1571 return NULL;
1572 }
1573
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001574 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
1576 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577 for (; i < self->extra->length; i++)
1578 self->extra->children[i] = self->extra->children[i+1];
1579
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001580 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581 Py_RETURN_NONE;
1582}
1583
1584static PyObject*
1585element_repr(ElementObject* self)
1586{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001587 int status;
1588
1589 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001590 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001591
1592 status = Py_ReprEnter((PyObject *)self);
1593 if (status == 0) {
1594 PyObject *res;
1595 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1596 Py_ReprLeave((PyObject *)self);
1597 return res;
1598 }
1599 if (status > 0)
1600 PyErr_Format(PyExc_RuntimeError,
1601 "reentrant call inside %s.__repr__",
1602 Py_TYPE(self)->tp_name);
1603 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604}
1605
Serhiy Storchakacb985562015-05-04 15:32:48 +03001606/*[clinic input]
1607_elementtree.Element.set
1608
1609 key: object
1610 value: object
1611 /
1612
1613[clinic start generated code]*/
1614
1615static PyObject *
1616_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1617 PyObject *value)
1618/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001619{
1620 PyObject* attrib;
1621
Victor Stinner5f0af232013-07-11 23:01:36 +02001622 if (!self->extra) {
1623 if (create_extra(self, NULL) < 0)
1624 return NULL;
1625 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626
1627 attrib = element_get_attrib(self);
1628 if (!attrib)
1629 return NULL;
1630
1631 if (PyDict_SetItem(attrib, key, value) < 0)
1632 return NULL;
1633
1634 Py_RETURN_NONE;
1635}
1636
1637static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001638element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001639{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001640 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001641 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001642 PyObject* old;
1643
1644 if (!self->extra || index < 0 || index >= self->extra->length) {
1645 PyErr_SetString(
1646 PyExc_IndexError,
1647 "child assignment index out of range");
1648 return -1;
1649 }
1650
1651 old = self->extra->children[index];
1652
1653 if (item) {
1654 Py_INCREF(item);
1655 self->extra->children[index] = item;
1656 } else {
1657 self->extra->length--;
1658 for (i = index; i < self->extra->length; i++)
1659 self->extra->children[i] = self->extra->children[i+1];
1660 }
1661
1662 Py_DECREF(old);
1663
1664 return 0;
1665}
1666
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001667static PyObject*
1668element_subscr(PyObject* self_, PyObject* item)
1669{
1670 ElementObject* self = (ElementObject*) self_;
1671
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001672 if (PyIndex_Check(item)) {
1673 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001674
1675 if (i == -1 && PyErr_Occurred()) {
1676 return NULL;
1677 }
1678 if (i < 0 && self->extra)
1679 i += self->extra->length;
1680 return element_getitem(self_, i);
1681 }
1682 else if (PySlice_Check(item)) {
1683 Py_ssize_t start, stop, step, slicelen, cur, i;
1684 PyObject* list;
1685
1686 if (!self->extra)
1687 return PyList_New(0);
1688
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001689 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001690 self->extra->length,
1691 &start, &stop, &step, &slicelen) < 0) {
1692 return NULL;
1693 }
1694
1695 if (slicelen <= 0)
1696 return PyList_New(0);
1697 else {
1698 list = PyList_New(slicelen);
1699 if (!list)
1700 return NULL;
1701
1702 for (cur = start, i = 0; i < slicelen;
1703 cur += step, i++) {
1704 PyObject* item = self->extra->children[cur];
1705 Py_INCREF(item);
1706 PyList_SET_ITEM(list, i, item);
1707 }
1708
1709 return list;
1710 }
1711 }
1712 else {
1713 PyErr_SetString(PyExc_TypeError,
1714 "element indices must be integers");
1715 return NULL;
1716 }
1717}
1718
1719static int
1720element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1721{
1722 ElementObject* self = (ElementObject*) self_;
1723
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724 if (PyIndex_Check(item)) {
1725 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
1727 if (i == -1 && PyErr_Occurred()) {
1728 return -1;
1729 }
1730 if (i < 0 && self->extra)
1731 i += self->extra->length;
1732 return element_setitem(self_, i, value);
1733 }
1734 else if (PySlice_Check(item)) {
1735 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1736
1737 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001738 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001739
Victor Stinner5f0af232013-07-11 23:01:36 +02001740 if (!self->extra) {
1741 if (create_extra(self, NULL) < 0)
1742 return -1;
1743 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001745 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001746 self->extra->length,
1747 &start, &stop, &step, &slicelen) < 0) {
1748 return -1;
1749 }
1750
Eli Bendersky865756a2012-03-09 13:38:15 +02001751 if (value == NULL) {
1752 /* Delete slice */
1753 size_t cur;
1754 Py_ssize_t i;
1755
1756 if (slicelen <= 0)
1757 return 0;
1758
1759 /* Since we're deleting, the direction of the range doesn't matter,
1760 * so for simplicity make it always ascending.
1761 */
1762 if (step < 0) {
1763 stop = start + 1;
1764 start = stop + step * (slicelen - 1) - 1;
1765 step = -step;
1766 }
1767
1768 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1769
1770 /* recycle is a list that will contain all the children
1771 * scheduled for removal.
1772 */
1773 if (!(recycle = PyList_New(slicelen))) {
1774 PyErr_NoMemory();
1775 return -1;
1776 }
1777
1778 /* This loop walks over all the children that have to be deleted,
1779 * with cur pointing at them. num_moved is the amount of children
1780 * until the next deleted child that have to be "shifted down" to
1781 * occupy the deleted's places.
1782 * Note that in the ith iteration, shifting is done i+i places down
1783 * because i children were already removed.
1784 */
1785 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1786 /* Compute how many children have to be moved, clipping at the
1787 * list end.
1788 */
1789 Py_ssize_t num_moved = step - 1;
1790 if (cur + step >= (size_t)self->extra->length) {
1791 num_moved = self->extra->length - cur - 1;
1792 }
1793
1794 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1795
1796 memmove(
1797 self->extra->children + cur - i,
1798 self->extra->children + cur + 1,
1799 num_moved * sizeof(PyObject *));
1800 }
1801
1802 /* Leftover "tail" after the last removed child */
1803 cur = start + (size_t)slicelen * step;
1804 if (cur < (size_t)self->extra->length) {
1805 memmove(
1806 self->extra->children + cur - slicelen,
1807 self->extra->children + cur,
1808 (self->extra->length - cur) * sizeof(PyObject *));
1809 }
1810
1811 self->extra->length -= slicelen;
1812
1813 /* Discard the recycle list with all the deleted sub-elements */
1814 Py_XDECREF(recycle);
1815 return 0;
1816 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001817
1818 /* A new slice is actually being assigned */
1819 seq = PySequence_Fast(value, "");
1820 if (!seq) {
1821 PyErr_Format(
1822 PyExc_TypeError,
1823 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1824 );
1825 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001826 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001827 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001828
1829 if (step != 1 && newlen != slicelen)
1830 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001831 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001832 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001833 "attempt to assign sequence of size %zd "
1834 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001835 newlen, slicelen
1836 );
1837 return -1;
1838 }
1839
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001840 /* Resize before creating the recycle bin, to prevent refleaks. */
1841 if (newlen > slicelen) {
1842 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001843 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001844 return -1;
1845 }
1846 }
1847
1848 if (slicelen > 0) {
1849 /* to avoid recursive calls to this method (via decref), move
1850 old items to the recycle bin here, and get rid of them when
1851 we're done modifying the element */
1852 recycle = PyList_New(slicelen);
1853 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001854 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001855 return -1;
1856 }
1857 for (cur = start, i = 0; i < slicelen;
1858 cur += step, i++)
1859 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1860 }
1861
1862 if (newlen < slicelen) {
1863 /* delete slice */
1864 for (i = stop; i < self->extra->length; i++)
1865 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1866 } else if (newlen > slicelen) {
1867 /* insert slice */
1868 for (i = self->extra->length-1; i >= stop; i--)
1869 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1870 }
1871
1872 /* replace the slice */
1873 for (cur = start, i = 0; i < newlen;
1874 cur += step, i++) {
1875 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1876 Py_INCREF(element);
1877 self->extra->children[cur] = element;
1878 }
1879
1880 self->extra->length += newlen - slicelen;
1881
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001882 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883
1884 /* discard the recycle bin, and everything in it */
1885 Py_XDECREF(recycle);
1886
1887 return 0;
1888 }
1889 else {
1890 PyErr_SetString(PyExc_TypeError,
1891 "element indices must be integers");
1892 return -1;
1893 }
1894}
1895
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001896static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001897element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001898{
1899 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001900 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001902 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001903 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001904
Alexander Belopolskye239d232010-12-08 23:31:48 +00001905 if (name == NULL)
1906 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001907
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001908 /* handle common attributes first */
1909 if (strcmp(name, "tag") == 0) {
1910 res = self->tag;
1911 Py_INCREF(res);
1912 return res;
1913 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001914 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001915 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001916 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001917 }
1918
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919 /* methods */
1920 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1921 if (res)
1922 return res;
1923
1924 /* less common attributes */
1925 if (strcmp(name, "tail") == 0) {
1926 PyErr_Clear();
1927 res = element_get_tail(self);
1928 } else if (strcmp(name, "attrib") == 0) {
1929 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001930 if (!self->extra) {
1931 if (create_extra(self, NULL) < 0)
1932 return NULL;
1933 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001934 res = element_get_attrib(self);
1935 }
1936
1937 if (!res)
1938 return NULL;
1939
1940 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001941 return res;
1942}
1943
Eli Benderskyef9683b2013-05-18 07:52:34 -07001944static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001945element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001946{
Eli Benderskyb20df952012-05-20 06:33:29 +03001947 char *name = "";
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001948
1949 if (value == NULL) {
1950 PyErr_SetString(PyExc_AttributeError,
1951 "can't delete attribute");
1952 return -1;
1953 }
Eli Benderskyb20df952012-05-20 06:33:29 +03001954 if (PyUnicode_Check(nameobj))
1955 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001956 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001957 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001958
1959 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001960 Py_INCREF(value);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03001961 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 } else if (strcmp(name, "text") == 0) {
1963 Py_DECREF(JOIN_OBJ(self->text));
1964 self->text = value;
1965 Py_INCREF(self->text);
1966 } else if (strcmp(name, "tail") == 0) {
1967 Py_DECREF(JOIN_OBJ(self->tail));
1968 self->tail = value;
1969 Py_INCREF(self->tail);
1970 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001971 if (!self->extra) {
1972 if (create_extra(self, NULL) < 0)
1973 return -1;
1974 }
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001975 Py_INCREF(value);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03001976 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001977 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001978 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001979 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001980 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001981 }
1982
Eli Benderskyef9683b2013-05-18 07:52:34 -07001983 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001984}
1985
1986static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001987 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001988 0, /* sq_concat */
1989 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001990 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001991 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001992 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001993 0,
1994};
1995
Eli Bendersky64d11e62012-06-15 07:42:50 +03001996/******************************* Element iterator ****************************/
1997
1998/* ElementIterObject represents the iteration state over an XML element in
1999 * pre-order traversal. To keep track of which sub-element should be returned
2000 * next, a stack of parents is maintained. This is a standard stack-based
2001 * iterative pre-order traversal of a tree.
2002 * The stack is managed using a single-linked list starting at parent_stack.
2003 * Each stack node contains the saved parent to which we should return after
2004 * the current one is exhausted, and the next child to examine in that parent.
2005 */
2006typedef struct ParentLocator_t {
2007 ElementObject *parent;
2008 Py_ssize_t child_index;
2009 struct ParentLocator_t *next;
2010} ParentLocator;
2011
2012typedef struct {
2013 PyObject_HEAD
2014 ParentLocator *parent_stack;
2015 ElementObject *root_element;
2016 PyObject *sought_tag;
2017 int root_done;
2018 int gettext;
2019} ElementIterObject;
2020
2021
2022static void
2023elementiter_dealloc(ElementIterObject *it)
2024{
2025 ParentLocator *p = it->parent_stack;
2026 while (p) {
2027 ParentLocator *temp = p;
2028 Py_XDECREF(p->parent);
2029 p = p->next;
2030 PyObject_Free(temp);
2031 }
2032
2033 Py_XDECREF(it->sought_tag);
2034 Py_XDECREF(it->root_element);
2035
2036 PyObject_GC_UnTrack(it);
2037 PyObject_GC_Del(it);
2038}
2039
2040static int
2041elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2042{
2043 ParentLocator *p = it->parent_stack;
2044 while (p) {
2045 Py_VISIT(p->parent);
2046 p = p->next;
2047 }
2048
2049 Py_VISIT(it->root_element);
2050 Py_VISIT(it->sought_tag);
2051 return 0;
2052}
2053
2054/* Helper function for elementiter_next. Add a new parent to the parent stack.
2055 */
2056static ParentLocator *
2057parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2058{
2059 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2060 if (new_node) {
2061 new_node->parent = parent;
2062 Py_INCREF(parent);
2063 new_node->child_index = 0;
2064 new_node->next = stack;
2065 }
2066 return new_node;
2067}
2068
2069static PyObject *
2070elementiter_next(ElementIterObject *it)
2071{
2072 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002073 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002074 * A short note on gettext: this function serves both the iter() and
2075 * itertext() methods to avoid code duplication. However, there are a few
2076 * small differences in the way these iterations work. Namely:
2077 * - itertext() only yields text from nodes that have it, and continues
2078 * iterating when a node doesn't have text (so it doesn't return any
2079 * node like iter())
2080 * - itertext() also has to handle tail, after finishing with all the
2081 * children of a node.
2082 */
Eli Bendersky113da642012-06-15 07:52:49 +03002083 ElementObject *cur_parent;
2084 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002085 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002086 ElementObject *elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002087
2088 while (1) {
2089 /* Handle the case reached in the beginning and end of iteration, where
2090 * the parent stack is empty. The root_done flag gives us indication
2091 * whether we've just started iterating (so root_done is 0), in which
2092 * case the root is returned. If root_done is 1 and we're here, the
2093 * iterator is exhausted.
2094 */
2095 if (!it->parent_stack->parent) {
2096 if (it->root_done) {
2097 PyErr_SetNone(PyExc_StopIteration);
2098 return NULL;
2099 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002100 elem = it->root_element;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002102 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002103 if (!it->parent_stack) {
2104 PyErr_NoMemory();
2105 return NULL;
2106 }
2107
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002108 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002109 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002110 rc = (it->sought_tag == Py_None);
2111 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002112 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002113 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002114 if (rc < 0) {
2115 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002116 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002117 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002118 }
2119 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002120 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002121 PyObject *text = element_get_text(elem);
2122 if (!text) {
2123 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002124 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002125 }
2126 Py_INCREF(text);
2127 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002128 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002129 if (rc > 0)
2130 return text;
2131 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002132 if (rc < 0)
2133 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002134 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002135 return (PyObject *)elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002136 }
2137 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002138 else {
2139 Py_DECREF(elem);
2140 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002141 }
2142 }
2143
2144 /* See if there are children left to traverse in the current parent. If
2145 * yes, visit the next child. If not, pop the stack and try again.
2146 */
Eli Bendersky113da642012-06-15 07:52:49 +03002147 cur_parent = it->parent_stack->parent;
2148 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 if (cur_parent->extra && child_index < cur_parent->extra->length) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002150 elem = (ElementObject *)cur_parent->extra->children[child_index];
Eli Bendersky64d11e62012-06-15 07:42:50 +03002151 it->parent_stack->child_index++;
2152 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002153 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002154 if (!it->parent_stack) {
2155 PyErr_NoMemory();
2156 return NULL;
2157 }
2158
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002159 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002161 PyObject *text = element_get_text(elem);
2162 if (!text) {
2163 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002164 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002165 }
2166 Py_INCREF(text);
2167 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002168 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002169 if (rc > 0)
2170 return text;
2171 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002172 if (rc < 0)
2173 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002174 } else {
2175 rc = (it->sought_tag == Py_None);
2176 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002177 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002178 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002179 if (rc < 0) {
2180 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002181 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002182 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002183 }
2184 if (rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002185 return (PyObject *)elem;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002186 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002187 Py_DECREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002189 }
2190 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002191 PyObject *tail;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002192 ParentLocator *next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002193 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002194 Py_INCREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002195 tail = element_get_tail(cur_parent);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002196 if (!tail) {
2197 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002198 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002199 }
2200 Py_INCREF(tail);
2201 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002202 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002203 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002204 tail = Py_None;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002205 Py_INCREF(tail);
2206 }
2207 next = it->parent_stack->next;
2208 cur_parent = it->parent_stack->parent;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002209 PyObject_Free(it->parent_stack);
2210 it->parent_stack = next;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002211 Py_XDECREF(cur_parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002212
2213 /* Note that extra condition on it->parent_stack->parent here;
2214 * this is because itertext() is supposed to only return *inner*
2215 * text, not text following the element it began iteration with.
2216 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002217 if (it->parent_stack->parent) {
2218 rc = PyObject_IsTrue(tail);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002219 if (rc > 0)
2220 return tail;
2221 Py_DECREF(tail);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002222 if (rc < 0)
2223 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002224 }
2225 else {
2226 Py_DECREF(tail);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002227 }
2228 }
2229 }
2230
2231 return NULL;
2232}
2233
2234
2235static PyTypeObject ElementIter_Type = {
2236 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002237 /* Using the module's name since the pure-Python implementation does not
2238 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002239 "_elementtree._element_iterator", /* tp_name */
2240 sizeof(ElementIterObject), /* tp_basicsize */
2241 0, /* tp_itemsize */
2242 /* methods */
2243 (destructor)elementiter_dealloc, /* tp_dealloc */
2244 0, /* tp_print */
2245 0, /* tp_getattr */
2246 0, /* tp_setattr */
2247 0, /* tp_reserved */
2248 0, /* tp_repr */
2249 0, /* tp_as_number */
2250 0, /* tp_as_sequence */
2251 0, /* tp_as_mapping */
2252 0, /* tp_hash */
2253 0, /* tp_call */
2254 0, /* tp_str */
2255 0, /* tp_getattro */
2256 0, /* tp_setattro */
2257 0, /* tp_as_buffer */
2258 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2259 0, /* tp_doc */
2260 (traverseproc)elementiter_traverse, /* tp_traverse */
2261 0, /* tp_clear */
2262 0, /* tp_richcompare */
2263 0, /* tp_weaklistoffset */
2264 PyObject_SelfIter, /* tp_iter */
2265 (iternextfunc)elementiter_next, /* tp_iternext */
2266 0, /* tp_methods */
2267 0, /* tp_members */
2268 0, /* tp_getset */
2269 0, /* tp_base */
2270 0, /* tp_dict */
2271 0, /* tp_descr_get */
2272 0, /* tp_descr_set */
2273 0, /* tp_dictoffset */
2274 0, /* tp_init */
2275 0, /* tp_alloc */
2276 0, /* tp_new */
2277};
2278
2279
2280static PyObject *
2281create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2282{
2283 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002284
2285 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2286 if (!it)
2287 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002288
Victor Stinner4d463432013-07-11 23:05:03 +02002289 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002290 it->sought_tag = tag;
2291 it->root_done = 0;
2292 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002293 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 it->root_element = self;
2295
Eli Bendersky64d11e62012-06-15 07:42:50 +03002296 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002297
2298 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2299 if (it->parent_stack == NULL) {
2300 Py_DECREF(it);
2301 PyErr_NoMemory();
2302 return NULL;
2303 }
2304 it->parent_stack->parent = NULL;
2305 it->parent_stack->child_index = 0;
2306 it->parent_stack->next = NULL;
2307
Eli Bendersky64d11e62012-06-15 07:42:50 +03002308 return (PyObject *)it;
2309}
2310
2311
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002312/* ==================================================================== */
2313/* the tree builder type */
2314
2315typedef struct {
2316 PyObject_HEAD
2317
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319
Antoine Pitrouee329312012-10-04 19:53:29 +02002320 PyObject *this; /* current node */
2321 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324
Eli Bendersky58d548d2012-05-29 15:45:16 +03002325 PyObject *stack; /* element stack */
2326 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002327
Eli Bendersky48d358b2012-05-30 17:57:50 +03002328 PyObject *element_factory;
2329
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002331 PyObject *events; /* list of events, or NULL if not collecting */
2332 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2333 PyObject *end_event_obj;
2334 PyObject *start_ns_event_obj;
2335 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002336} TreeBuilderObject;
2337
Christian Heimes90aa7642007-12-19 02:45:37 +00002338#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
2340/* -------------------------------------------------------------------- */
2341/* constructor and destructor */
2342
Eli Bendersky58d548d2012-05-29 15:45:16 +03002343static PyObject *
2344treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002346 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2347 if (t != NULL) {
2348 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002349
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002351 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002352 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002353 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002356 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002357 t->stack = PyList_New(20);
2358 if (!t->stack) {
2359 Py_DECREF(t->this);
2360 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002361 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002362 return NULL;
2363 }
2364 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002365
Eli Bendersky58d548d2012-05-29 15:45:16 +03002366 t->events = NULL;
2367 t->start_event_obj = t->end_event_obj = NULL;
2368 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2369 }
2370 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002371}
2372
Serhiy Storchakacb985562015-05-04 15:32:48 +03002373/*[clinic input]
2374_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002375
Serhiy Storchakacb985562015-05-04 15:32:48 +03002376 element_factory: object = NULL
2377
2378[clinic start generated code]*/
2379
2380static int
2381_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2382 PyObject *element_factory)
2383/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2384{
2385 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386
2387 if (element_factory) {
2388 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002389 tmp = self->element_factory;
2390 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002391 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002392 }
2393
Eli Bendersky58d548d2012-05-29 15:45:16 +03002394 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395}
2396
Eli Bendersky48d358b2012-05-30 17:57:50 +03002397static int
2398treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2399{
2400 Py_VISIT(self->root);
2401 Py_VISIT(self->this);
2402 Py_VISIT(self->last);
2403 Py_VISIT(self->data);
2404 Py_VISIT(self->stack);
2405 Py_VISIT(self->element_factory);
2406 return 0;
2407}
2408
2409static int
2410treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002411{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002412 Py_CLEAR(self->end_ns_event_obj);
2413 Py_CLEAR(self->start_ns_event_obj);
2414 Py_CLEAR(self->end_event_obj);
2415 Py_CLEAR(self->start_event_obj);
2416 Py_CLEAR(self->events);
2417 Py_CLEAR(self->stack);
2418 Py_CLEAR(self->data);
2419 Py_CLEAR(self->last);
2420 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002421 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002422 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002423 return 0;
2424}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002425
Eli Bendersky48d358b2012-05-30 17:57:50 +03002426static void
2427treebuilder_dealloc(TreeBuilderObject *self)
2428{
2429 PyObject_GC_UnTrack(self);
2430 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002431 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002432}
2433
2434/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002435/* helpers for handling of arbitrary element-like objects */
2436
2437static int
2438treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2439 PyObject **dest, _Py_Identifier *name)
2440{
2441 if (Element_CheckExact(element)) {
2442 Py_DECREF(JOIN_OBJ(*dest));
2443 *dest = JOIN_SET(data, PyList_CheckExact(data));
2444 return 0;
2445 }
2446 else {
2447 PyObject *joined = list_join(data);
2448 int r;
2449 if (joined == NULL)
2450 return -1;
2451 r = _PyObject_SetAttrId(element, name, joined);
2452 Py_DECREF(joined);
2453 return r;
2454 }
2455}
2456
2457/* These two functions steal a reference to data */
2458static int
2459treebuilder_set_element_text(PyObject *element, PyObject *data)
2460{
2461 _Py_IDENTIFIER(text);
2462 return treebuilder_set_element_text_or_tail(
2463 element, data, &((ElementObject *) element)->text, &PyId_text);
2464}
2465
2466static int
2467treebuilder_set_element_tail(PyObject *element, PyObject *data)
2468{
2469 _Py_IDENTIFIER(tail);
2470 return treebuilder_set_element_text_or_tail(
2471 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2472}
2473
2474static int
2475treebuilder_add_subelement(PyObject *element, PyObject *child)
2476{
2477 _Py_IDENTIFIER(append);
2478 if (Element_CheckExact(element)) {
2479 ElementObject *elem = (ElementObject *) element;
2480 return element_add_subelement(elem, child);
2481 }
2482 else {
2483 PyObject *res;
2484 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2485 if (res == NULL)
2486 return -1;
2487 Py_DECREF(res);
2488 return 0;
2489 }
2490}
2491
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002492LOCAL(int)
2493treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2494 PyObject *node)
2495{
2496 if (action != NULL) {
2497 PyObject *res = PyTuple_Pack(2, action, node);
2498 if (res == NULL)
2499 return -1;
2500 if (PyList_Append(self->events, res) < 0) {
2501 Py_DECREF(res);
2502 return -1;
2503 }
2504 Py_DECREF(res);
2505 }
2506 return 0;
2507}
2508
Antoine Pitrouee329312012-10-04 19:53:29 +02002509/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002510/* handlers */
2511
2512LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2514 PyObject* attrib)
2515{
2516 PyObject* node;
2517 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002518 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519
2520 if (self->data) {
2521 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002522 if (treebuilder_set_element_text(self->last, self->data))
2523 return NULL;
2524 }
2525 else {
2526 if (treebuilder_set_element_tail(self->last, self->data))
2527 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002528 }
2529 self->data = NULL;
2530 }
2531
Eli Bendersky08231a92013-05-18 15:47:16 -07002532 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002533 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2534 } else {
2535 node = create_new_element(tag, attrib);
2536 }
2537 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002539 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
Antoine Pitrouee329312012-10-04 19:53:29 +02002541 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542
2543 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002544 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002545 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546 } else {
2547 if (self->root) {
2548 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002549 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 "multiple elements on top level"
2551 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002552 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 }
2554 Py_INCREF(node);
2555 self->root = node;
2556 }
2557
2558 if (self->index < PyList_GET_SIZE(self->stack)) {
2559 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002560 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 Py_INCREF(this);
2562 } else {
2563 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002564 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002565 }
2566 self->index++;
2567
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002569 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002571 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002572
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002573 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2574 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002575
2576 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002577
2578 error:
2579 Py_DECREF(node);
2580 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002581}
2582
2583LOCAL(PyObject*)
2584treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2585{
2586 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002587 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002588 /* ignore calls to data before the first call to start */
2589 Py_RETURN_NONE;
2590 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 /* store the first item as is */
2592 Py_INCREF(data); self->data = data;
2593 } else {
2594 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002595 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2596 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002597 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002598 /* expat often generates single character data sections; handle
2599 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002600 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2601 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002603 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 } else if (PyList_CheckExact(self->data)) {
2605 if (PyList_Append(self->data, data) < 0)
2606 return NULL;
2607 } else {
2608 PyObject* list = PyList_New(2);
2609 if (!list)
2610 return NULL;
2611 PyList_SET_ITEM(list, 0, self->data);
2612 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2613 self->data = list;
2614 }
2615 }
2616
2617 Py_RETURN_NONE;
2618}
2619
2620LOCAL(PyObject*)
2621treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2622{
2623 PyObject* item;
2624
2625 if (self->data) {
2626 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002627 if (treebuilder_set_element_text(self->last, self->data))
2628 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002629 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002630 if (treebuilder_set_element_tail(self->last, self->data))
2631 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002632 }
2633 self->data = NULL;
2634 }
2635
2636 if (self->index == 0) {
2637 PyErr_SetString(
2638 PyExc_IndexError,
2639 "pop from empty stack"
2640 );
2641 return NULL;
2642 }
2643
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002644 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002645 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002646 self->index--;
2647 self->this = PyList_GET_ITEM(self->stack, self->index);
2648 Py_INCREF(self->this);
2649 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002651 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2652 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653
2654 Py_INCREF(self->last);
2655 return (PyObject*) self->last;
2656}
2657
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658/* -------------------------------------------------------------------- */
2659/* methods (in alphabetical order) */
2660
Serhiy Storchakacb985562015-05-04 15:32:48 +03002661/*[clinic input]
2662_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663
Serhiy Storchakacb985562015-05-04 15:32:48 +03002664 data: object
2665 /
2666
2667[clinic start generated code]*/
2668
2669static PyObject *
2670_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2671/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2672{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 return treebuilder_handle_data(self, data);
2674}
2675
Serhiy Storchakacb985562015-05-04 15:32:48 +03002676/*[clinic input]
2677_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678
Serhiy Storchakacb985562015-05-04 15:32:48 +03002679 tag: object
2680 /
2681
2682[clinic start generated code]*/
2683
2684static PyObject *
2685_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2686/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2687{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002688 return treebuilder_handle_end(self, tag);
2689}
2690
2691LOCAL(PyObject*)
2692treebuilder_done(TreeBuilderObject* self)
2693{
2694 PyObject* res;
2695
2696 /* FIXME: check stack size? */
2697
2698 if (self->root)
2699 res = self->root;
2700 else
2701 res = Py_None;
2702
2703 Py_INCREF(res);
2704 return res;
2705}
2706
Serhiy Storchakacb985562015-05-04 15:32:48 +03002707/*[clinic input]
2708_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
Serhiy Storchakacb985562015-05-04 15:32:48 +03002710[clinic start generated code]*/
2711
2712static PyObject *
2713_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2714/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2715{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002716 return treebuilder_done(self);
2717}
2718
Serhiy Storchakacb985562015-05-04 15:32:48 +03002719/*[clinic input]
2720_elementtree.TreeBuilder.start
2721
2722 tag: object
2723 attrs: object = None
2724 /
2725
2726[clinic start generated code]*/
2727
2728static PyObject *
2729_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2730 PyObject *attrs)
2731/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002733 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734}
2735
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736/* ==================================================================== */
2737/* the expat interface */
2738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002740#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002741
2742/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2743 * cached globally without being in per-module state.
2744 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002745static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747
Eli Bendersky52467b12012-06-01 07:13:08 +03002748static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2749 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751typedef struct {
2752 PyObject_HEAD
2753
2754 XML_Parser parser;
2755
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002756 PyObject *target;
2757 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002759 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002761 PyObject *handle_start;
2762 PyObject *handle_data;
2763 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002765 PyObject *handle_comment;
2766 PyObject *handle_pi;
2767 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002769 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771} XMLParserObject;
2772
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002773static PyObject*
2774_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2775static PyObject *
2776_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2777 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779/* helpers */
2780
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002781LOCAL(PyObject*)
2782makeuniversal(XMLParserObject* self, const char* string)
2783{
2784 /* convert a UTF-8 tag/attribute name from the expat parser
2785 to a universal name string */
2786
Antoine Pitrouc1948842012-10-01 23:40:37 +02002787 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 PyObject* key;
2789 PyObject* value;
2790
2791 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002792 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793 if (!key)
2794 return NULL;
2795
2796 value = PyDict_GetItem(self->names, key);
2797
2798 if (value) {
2799 Py_INCREF(value);
2800 } else {
2801 /* new name. convert to universal name, and decode as
2802 necessary */
2803
2804 PyObject* tag;
2805 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002806 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002807
2808 /* look for namespace separator */
2809 for (i = 0; i < size; i++)
2810 if (string[i] == '}')
2811 break;
2812 if (i != size) {
2813 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002814 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002815 if (tag == NULL) {
2816 Py_DECREF(key);
2817 return NULL;
2818 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002819 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002820 p[0] = '{';
2821 memcpy(p+1, string, size);
2822 size++;
2823 } else {
2824 /* plain name; use key as tag */
2825 Py_INCREF(key);
2826 tag = key;
2827 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002828
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002830 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002831 value = PyUnicode_DecodeUTF8(p, size, "strict");
2832 Py_DECREF(tag);
2833 if (!value) {
2834 Py_DECREF(key);
2835 return NULL;
2836 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002837
2838 /* add to names dictionary */
2839 if (PyDict_SetItem(self->names, key, value) < 0) {
2840 Py_DECREF(key);
2841 Py_DECREF(value);
2842 return NULL;
2843 }
2844 }
2845
2846 Py_DECREF(key);
2847 return value;
2848}
2849
Eli Bendersky5b77d812012-03-16 08:20:05 +02002850/* Set the ParseError exception with the given parameters.
2851 * If message is not NULL, it's used as the error string. Otherwise, the
2852 * message string is the default for the given error_code.
2853*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002855expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2856 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002857{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002858 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002859 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002861 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002862 message ? message : EXPAT(ErrorString)(error_code),
2863 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002864 if (errmsg == NULL)
2865 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866
Eli Bendersky532d03e2013-08-10 08:00:39 -07002867 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002868 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869 if (!error)
2870 return;
2871
Eli Bendersky5b77d812012-03-16 08:20:05 +02002872 /* Add code and position attributes */
2873 code = PyLong_FromLong((long)error_code);
2874 if (!code) {
2875 Py_DECREF(error);
2876 return;
2877 }
2878 if (PyObject_SetAttrString(error, "code", code) == -1) {
2879 Py_DECREF(error);
2880 Py_DECREF(code);
2881 return;
2882 }
2883 Py_DECREF(code);
2884
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002885 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002886 if (!position) {
2887 Py_DECREF(error);
2888 return;
2889 }
2890 if (PyObject_SetAttrString(error, "position", position) == -1) {
2891 Py_DECREF(error);
2892 Py_DECREF(position);
2893 return;
2894 }
2895 Py_DECREF(position);
2896
Eli Bendersky532d03e2013-08-10 08:00:39 -07002897 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002898 Py_DECREF(error);
2899}
2900
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901/* -------------------------------------------------------------------- */
2902/* handlers */
2903
2904static void
2905expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2906 int data_len)
2907{
2908 PyObject* key;
2909 PyObject* value;
2910 PyObject* res;
2911
2912 if (data_len < 2 || data_in[0] != '&')
2913 return;
2914
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002915 if (PyErr_Occurred())
2916 return;
2917
Neal Norwitz0269b912007-08-08 06:56:02 +00002918 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919 if (!key)
2920 return;
2921
2922 value = PyDict_GetItem(self->entity, key);
2923
2924 if (value) {
2925 if (TreeBuilder_CheckExact(self->target))
2926 res = treebuilder_handle_data(
2927 (TreeBuilderObject*) self->target, value
2928 );
2929 else if (self->handle_data)
2930 res = PyObject_CallFunction(self->handle_data, "O", value);
2931 else
2932 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002933 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002934 } else if (!PyErr_Occurred()) {
2935 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002936 char message[128] = "undefined entity ";
2937 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002938 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002939 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002940 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002941 EXPAT(GetErrorColumnNumber)(self->parser),
2942 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002943 );
2944 }
2945
2946 Py_DECREF(key);
2947}
2948
2949static void
2950expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2951 const XML_Char **attrib_in)
2952{
2953 PyObject* res;
2954 PyObject* tag;
2955 PyObject* attrib;
2956 int ok;
2957
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002958 if (PyErr_Occurred())
2959 return;
2960
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961 /* tag name */
2962 tag = makeuniversal(self, tag_in);
2963 if (!tag)
2964 return; /* parser will look for errors */
2965
2966 /* attributes */
2967 if (attrib_in[0]) {
2968 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002969 if (!attrib) {
2970 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002972 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973 while (attrib_in[0] && attrib_in[1]) {
2974 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002975 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 if (!key || !value) {
2977 Py_XDECREF(value);
2978 Py_XDECREF(key);
2979 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002980 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002981 return;
2982 }
2983 ok = PyDict_SetItem(attrib, key, value);
2984 Py_DECREF(value);
2985 Py_DECREF(key);
2986 if (ok < 0) {
2987 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002988 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 return;
2990 }
2991 attrib_in += 2;
2992 }
2993 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002994 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002995 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002996 if (!attrib) {
2997 Py_DECREF(tag);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002998 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002999 }
Eli Bendersky48d358b2012-05-30 17:57:50 +03003000 }
3001
3002 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003 /* shortcut */
3004 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3005 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003006 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003009 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010 res = NULL;
3011
3012 Py_DECREF(tag);
3013 Py_DECREF(attrib);
3014
3015 Py_XDECREF(res);
3016}
3017
3018static void
3019expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3020 int data_len)
3021{
3022 PyObject* data;
3023 PyObject* res;
3024
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003025 if (PyErr_Occurred())
3026 return;
3027
Neal Norwitz0269b912007-08-08 06:56:02 +00003028 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003029 if (!data)
3030 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003031
3032 if (TreeBuilder_CheckExact(self->target))
3033 /* shortcut */
3034 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3035 else if (self->handle_data)
3036 res = PyObject_CallFunction(self->handle_data, "O", data);
3037 else
3038 res = NULL;
3039
3040 Py_DECREF(data);
3041
3042 Py_XDECREF(res);
3043}
3044
3045static void
3046expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3047{
3048 PyObject* tag;
3049 PyObject* res = NULL;
3050
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003051 if (PyErr_Occurred())
3052 return;
3053
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003054 if (TreeBuilder_CheckExact(self->target))
3055 /* shortcut */
3056 /* the standard tree builder doesn't look at the end tag */
3057 res = treebuilder_handle_end(
3058 (TreeBuilderObject*) self->target, Py_None
3059 );
3060 else if (self->handle_end) {
3061 tag = makeuniversal(self, tag_in);
3062 if (tag) {
3063 res = PyObject_CallFunction(self->handle_end, "O", tag);
3064 Py_DECREF(tag);
3065 }
3066 }
3067
3068 Py_XDECREF(res);
3069}
3070
3071static void
3072expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3073 const XML_Char *uri)
3074{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003075 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3076 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003077
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003078 if (PyErr_Occurred())
3079 return;
3080
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003081 if (!target->events || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003082 return;
3083
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003084 if (!uri)
3085 uri = "";
3086 if (!prefix)
3087 prefix = "";
3088
3089 parcel = Py_BuildValue("ss", prefix, uri);
3090 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003091 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003092 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3093 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003094}
3095
3096static void
3097expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3098{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003099 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3100
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003101 if (PyErr_Occurred())
3102 return;
3103
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003104 if (!target->events)
3105 return;
3106
3107 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003108}
3109
3110static void
3111expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3112{
3113 PyObject* comment;
3114 PyObject* res;
3115
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003116 if (PyErr_Occurred())
3117 return;
3118
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003120 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003121 if (comment) {
3122 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3123 Py_XDECREF(res);
3124 Py_DECREF(comment);
3125 }
3126 }
3127}
3128
Eli Bendersky45839902013-01-13 05:14:47 -08003129static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003130expat_start_doctype_handler(XMLParserObject *self,
3131 const XML_Char *doctype_name,
3132 const XML_Char *sysid,
3133 const XML_Char *pubid,
3134 int has_internal_subset)
3135{
3136 PyObject *self_pyobj = (PyObject *)self;
3137 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3138 PyObject *parser_doctype = NULL;
3139 PyObject *res = NULL;
3140
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003141 if (PyErr_Occurred())
3142 return;
3143
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003144 doctype_name_obj = makeuniversal(self, doctype_name);
3145 if (!doctype_name_obj)
3146 return;
3147
3148 if (sysid) {
3149 sysid_obj = makeuniversal(self, sysid);
3150 if (!sysid_obj) {
3151 Py_DECREF(doctype_name_obj);
3152 return;
3153 }
3154 } else {
3155 Py_INCREF(Py_None);
3156 sysid_obj = Py_None;
3157 }
3158
3159 if (pubid) {
3160 pubid_obj = makeuniversal(self, pubid);
3161 if (!pubid_obj) {
3162 Py_DECREF(doctype_name_obj);
3163 Py_DECREF(sysid_obj);
3164 return;
3165 }
3166 } else {
3167 Py_INCREF(Py_None);
3168 pubid_obj = Py_None;
3169 }
3170
3171 /* If the target has a handler for doctype, call it. */
3172 if (self->handle_doctype) {
3173 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3174 doctype_name_obj, pubid_obj, sysid_obj);
3175 Py_CLEAR(res);
3176 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003177 else {
3178 /* Now see if the parser itself has a doctype method. If yes and it's
3179 * a custom method, call it but warn about deprecation. If it's only
3180 * the vanilla XMLParser method, do nothing.
3181 */
3182 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3183 if (parser_doctype &&
3184 !(PyCFunction_Check(parser_doctype) &&
3185 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3186 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003187 (PyCFunction) _elementtree_XMLParser_doctype)) {
3188 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3189 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003190 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003191 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003192 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003193 res = PyObject_CallFunction(parser_doctype, "OOO",
3194 doctype_name_obj, pubid_obj, sysid_obj);
3195 Py_CLEAR(res);
3196 }
3197 }
3198
3199clear:
3200 Py_XDECREF(parser_doctype);
3201 Py_DECREF(doctype_name_obj);
3202 Py_DECREF(pubid_obj);
3203 Py_DECREF(sysid_obj);
3204}
3205
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003206static void
3207expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3208 const XML_Char* data_in)
3209{
3210 PyObject* target;
3211 PyObject* data;
3212 PyObject* res;
3213
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003214 if (PyErr_Occurred())
3215 return;
3216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003218 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3219 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220 if (target && data) {
3221 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3222 Py_XDECREF(res);
3223 Py_DECREF(data);
3224 Py_DECREF(target);
3225 } else {
3226 Py_XDECREF(data);
3227 Py_XDECREF(target);
3228 }
3229 }
3230}
3231
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233
Eli Bendersky52467b12012-06-01 07:13:08 +03003234static PyObject *
3235xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003236{
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3238 if (self) {
3239 self->parser = NULL;
3240 self->target = self->entity = self->names = NULL;
3241 self->handle_start = self->handle_data = self->handle_end = NULL;
3242 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003243 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003245 return (PyObject *)self;
3246}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003247
Serhiy Storchakacb985562015-05-04 15:32:48 +03003248/*[clinic input]
3249_elementtree.XMLParser.__init__
3250
3251 html: object = NULL
3252 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003253 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003254
3255[clinic start generated code]*/
3256
Eli Bendersky52467b12012-06-01 07:13:08 +03003257static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003258_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3259 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003260/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003261{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003262 self->entity = PyDict_New();
3263 if (!self->entity)
3264 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266 self->names = PyDict_New();
3267 if (!self->names) {
3268 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003269 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003270 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003271
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3273 if (!self->parser) {
3274 Py_CLEAR(self->entity);
3275 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003277 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003278 }
3279
Eli Bendersky52467b12012-06-01 07:13:08 +03003280 if (target) {
3281 Py_INCREF(target);
3282 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003283 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285 Py_CLEAR(self->entity);
3286 Py_CLEAR(self->names);
3287 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003288 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003290 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->handle_start = PyObject_GetAttrString(target, "start");
3294 self->handle_data = PyObject_GetAttrString(target, "data");
3295 self->handle_end = PyObject_GetAttrString(target, "end");
3296 self->handle_comment = PyObject_GetAttrString(target, "comment");
3297 self->handle_pi = PyObject_GetAttrString(target, "pi");
3298 self->handle_close = PyObject_GetAttrString(target, "close");
3299 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300
3301 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003302
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 (XML_StartElementHandler) expat_start_handler,
3308 (XML_EndElementHandler) expat_end_handler
3309 );
3310 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003311 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312 (XML_DefaultHandler) expat_default_handler
3313 );
3314 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003315 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316 (XML_CharacterDataHandler) expat_data_handler
3317 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321 (XML_CommentHandler) expat_comment_handler
3322 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003323 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003325 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003326 (XML_ProcessingInstructionHandler) expat_pi_handler
3327 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003328 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003329 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003330 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3331 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003332 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003333 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003334 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003335 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336
Eli Bendersky52467b12012-06-01 07:13:08 +03003337 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338}
3339
Eli Bendersky52467b12012-06-01 07:13:08 +03003340static int
3341xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3342{
3343 Py_VISIT(self->handle_close);
3344 Py_VISIT(self->handle_pi);
3345 Py_VISIT(self->handle_comment);
3346 Py_VISIT(self->handle_end);
3347 Py_VISIT(self->handle_data);
3348 Py_VISIT(self->handle_start);
3349
3350 Py_VISIT(self->target);
3351 Py_VISIT(self->entity);
3352 Py_VISIT(self->names);
3353
3354 return 0;
3355}
3356
3357static int
3358xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359{
3360 EXPAT(ParserFree)(self->parser);
3361
Antoine Pitrouc1948842012-10-01 23:40:37 +02003362 Py_CLEAR(self->handle_close);
3363 Py_CLEAR(self->handle_pi);
3364 Py_CLEAR(self->handle_comment);
3365 Py_CLEAR(self->handle_end);
3366 Py_CLEAR(self->handle_data);
3367 Py_CLEAR(self->handle_start);
3368 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369
Antoine Pitrouc1948842012-10-01 23:40:37 +02003370 Py_CLEAR(self->target);
3371 Py_CLEAR(self->entity);
3372 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373
Eli Bendersky52467b12012-06-01 07:13:08 +03003374 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003375}
3376
Eli Bendersky52467b12012-06-01 07:13:08 +03003377static void
3378xmlparser_dealloc(XMLParserObject* self)
3379{
3380 PyObject_GC_UnTrack(self);
3381 xmlparser_gc_clear(self);
3382 Py_TYPE(self)->tp_free((PyObject *)self);
3383}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384
3385LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003386expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387{
3388 int ok;
3389
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003390 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003391 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3392
3393 if (PyErr_Occurred())
3394 return NULL;
3395
3396 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003397 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003398 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003400 EXPAT(GetErrorColumnNumber)(self->parser),
3401 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402 );
3403 return NULL;
3404 }
3405
3406 Py_RETURN_NONE;
3407}
3408
Serhiy Storchakacb985562015-05-04 15:32:48 +03003409/*[clinic input]
3410_elementtree.XMLParser.close
3411
3412[clinic start generated code]*/
3413
3414static PyObject *
3415_elementtree_XMLParser_close_impl(XMLParserObject *self)
3416/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003417{
3418 /* end feeding data to parser */
3419
3420 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003422 if (!res)
3423 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003424
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003425 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003426 Py_DECREF(res);
3427 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003428 }
3429 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003430 Py_DECREF(res);
3431 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003432 }
3433 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003434 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003435 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003436}
3437
Serhiy Storchakacb985562015-05-04 15:32:48 +03003438/*[clinic input]
3439_elementtree.XMLParser.feed
3440
3441 data: object
3442 /
3443
3444[clinic start generated code]*/
3445
3446static PyObject *
3447_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3448/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003449{
3450 /* feed data to parser */
3451
Serhiy Storchakacb985562015-05-04 15:32:48 +03003452 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003453 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003454 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3455 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003456 return NULL;
3457 if (data_len > INT_MAX) {
3458 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3459 return NULL;
3460 }
3461 /* Explicitly set UTF-8 encoding. Return code ignored. */
3462 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003463 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003464 }
3465 else {
3466 Py_buffer view;
3467 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003468 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003469 return NULL;
3470 if (view.len > INT_MAX) {
3471 PyBuffer_Release(&view);
3472 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3473 return NULL;
3474 }
3475 res = expat_parse(self, view.buf, (int)view.len, 0);
3476 PyBuffer_Release(&view);
3477 return res;
3478 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479}
3480
Serhiy Storchakacb985562015-05-04 15:32:48 +03003481/*[clinic input]
3482_elementtree.XMLParser._parse_whole
3483
3484 file: object
3485 /
3486
3487[clinic start generated code]*/
3488
3489static PyObject *
3490_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3491/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492{
Eli Benderskya3699232013-05-19 18:47:23 -07003493 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003494 PyObject* reader;
3495 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003496 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003497 PyObject* res;
3498
Serhiy Storchakacb985562015-05-04 15:32:48 +03003499 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003500 if (!reader)
3501 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003502
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003503 /* read from open file object */
3504 for (;;) {
3505
3506 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3507
3508 if (!buffer) {
3509 /* read failed (e.g. due to KeyboardInterrupt) */
3510 Py_DECREF(reader);
3511 return NULL;
3512 }
3513
Eli Benderskyf996e772012-03-16 05:53:30 +02003514 if (PyUnicode_CheckExact(buffer)) {
3515 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003516 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003517 Py_DECREF(buffer);
3518 break;
3519 }
3520 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003521 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003522 if (!temp) {
3523 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003524 Py_DECREF(reader);
3525 return NULL;
3526 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003527 buffer = temp;
3528 }
3529 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003530 Py_DECREF(buffer);
3531 break;
3532 }
3533
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003534 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3535 Py_DECREF(buffer);
3536 Py_DECREF(reader);
3537 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3538 return NULL;
3539 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003541 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003542 );
3543
3544 Py_DECREF(buffer);
3545
3546 if (!res) {
3547 Py_DECREF(reader);
3548 return NULL;
3549 }
3550 Py_DECREF(res);
3551
3552 }
3553
3554 Py_DECREF(reader);
3555
3556 res = expat_parse(self, "", 0, 1);
3557
3558 if (res && TreeBuilder_CheckExact(self->target)) {
3559 Py_DECREF(res);
3560 return treebuilder_done((TreeBuilderObject*) self->target);
3561 }
3562
3563 return res;
3564}
3565
Serhiy Storchakacb985562015-05-04 15:32:48 +03003566/*[clinic input]
3567_elementtree.XMLParser.doctype
3568
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003569 name: object
3570 pubid: object
3571 system: object
3572 /
3573
Serhiy Storchakacb985562015-05-04 15:32:48 +03003574[clinic start generated code]*/
3575
3576static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003577_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3578 PyObject *pubid, PyObject *system)
3579/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003580{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003581 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3582 "This method of XMLParser is deprecated. Define"
3583 " doctype() method on the TreeBuilder target.",
3584 1) < 0) {
3585 return NULL;
3586 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003587 Py_RETURN_NONE;
3588}
3589
Serhiy Storchakacb985562015-05-04 15:32:48 +03003590/*[clinic input]
3591_elementtree.XMLParser._setevents
3592
3593 events_queue: object(subclass_of='&PyList_Type')
3594 events_to_report: object = None
3595 /
3596
3597[clinic start generated code]*/
3598
3599static PyObject *
3600_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3601 PyObject *events_queue,
3602 PyObject *events_to_report)
3603/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604{
3605 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003606 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003607 TreeBuilderObject *target;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003608 PyObject *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609
3610 if (!TreeBuilder_CheckExact(self->target)) {
3611 PyErr_SetString(
3612 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003613 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614 "targets"
3615 );
3616 return NULL;
3617 }
3618
3619 target = (TreeBuilderObject*) self->target;
3620
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003621 Py_INCREF(events_queue);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003622 Py_XSETREF(target->events, events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003623
3624 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003625 Py_CLEAR(target->start_event_obj);
3626 Py_CLEAR(target->end_event_obj);
3627 Py_CLEAR(target->start_ns_event_obj);
3628 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003630 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003632 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 Py_RETURN_NONE;
3634 }
3635
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003636 if (!(events_seq = PySequence_Fast(events_to_report,
3637 "events must be a sequence"))) {
3638 return NULL;
3639 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003640
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003641 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3643 char *event_name = NULL;
3644 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003645 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003646 } else if (PyBytes_Check(event_name_obj)) {
3647 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003648 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003649 if (event_name == NULL) {
3650 Py_DECREF(events_seq);
3651 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3652 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003653 }
3654
3655 Py_INCREF(event_name_obj);
3656 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003657 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003658 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003659 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003660 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003661 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 EXPAT(SetNamespaceDeclHandler)(
3663 self->parser,
3664 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3665 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3666 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003667 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003668 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669 EXPAT(SetNamespaceDeclHandler)(
3670 self->parser,
3671 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3672 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3673 );
3674 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003675 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003676 Py_DECREF(events_seq);
3677 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678 return NULL;
3679 }
3680 }
3681
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003682 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684}
3685
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003686static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003687xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003689 if (PyUnicode_Check(nameobj)) {
3690 PyObject* res;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003691 if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003692 res = self->entity;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003693 else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
Alexander Belopolskye239d232010-12-08 23:31:48 +00003694 res = self->target;
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02003695 else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00003696 return PyUnicode_FromFormat(
3697 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003698 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003699 }
3700 else
3701 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003702
Alexander Belopolskye239d232010-12-08 23:31:48 +00003703 Py_INCREF(res);
3704 return res;
3705 }
3706 generic:
3707 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003708}
3709
Serhiy Storchakacb985562015-05-04 15:32:48 +03003710#include "clinic/_elementtree.c.h"
3711
3712static PyMethodDef element_methods[] = {
3713
3714 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3715
3716 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3717 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3718
3719 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3720 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3721 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3722
3723 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3724 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3725 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3726 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3727
3728 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3729 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3730 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3731
3732 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3733 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3734
3735 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3736 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3737
3738 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3739
3740 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3741 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3742 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3743 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3744 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3745
3746 {NULL, NULL}
3747};
3748
3749static PyMappingMethods element_as_mapping = {
3750 (lenfunc) element_length,
3751 (binaryfunc) element_subscr,
3752 (objobjargproc) element_ass_subscr,
3753};
3754
3755static PyTypeObject Element_Type = {
3756 PyVarObject_HEAD_INIT(NULL, 0)
3757 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3758 /* methods */
3759 (destructor)element_dealloc, /* tp_dealloc */
3760 0, /* tp_print */
3761 0, /* tp_getattr */
3762 0, /* tp_setattr */
3763 0, /* tp_reserved */
3764 (reprfunc)element_repr, /* tp_repr */
3765 0, /* tp_as_number */
3766 &element_as_sequence, /* tp_as_sequence */
3767 &element_as_mapping, /* tp_as_mapping */
3768 0, /* tp_hash */
3769 0, /* tp_call */
3770 0, /* tp_str */
3771 (getattrofunc)element_getattro, /* tp_getattro */
3772 (setattrofunc)element_setattro, /* tp_setattro */
3773 0, /* tp_as_buffer */
3774 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3775 /* tp_flags */
3776 0, /* tp_doc */
3777 (traverseproc)element_gc_traverse, /* tp_traverse */
3778 (inquiry)element_gc_clear, /* tp_clear */
3779 0, /* tp_richcompare */
3780 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3781 0, /* tp_iter */
3782 0, /* tp_iternext */
3783 element_methods, /* tp_methods */
3784 0, /* tp_members */
3785 0, /* tp_getset */
3786 0, /* tp_base */
3787 0, /* tp_dict */
3788 0, /* tp_descr_get */
3789 0, /* tp_descr_set */
3790 0, /* tp_dictoffset */
3791 (initproc)element_init, /* tp_init */
3792 PyType_GenericAlloc, /* tp_alloc */
3793 element_new, /* tp_new */
3794 0, /* tp_free */
3795};
3796
3797static PyMethodDef treebuilder_methods[] = {
3798 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3799 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3800 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3801 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3802 {NULL, NULL}
3803};
3804
3805static PyTypeObject TreeBuilder_Type = {
3806 PyVarObject_HEAD_INIT(NULL, 0)
3807 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3808 /* methods */
3809 (destructor)treebuilder_dealloc, /* tp_dealloc */
3810 0, /* tp_print */
3811 0, /* tp_getattr */
3812 0, /* tp_setattr */
3813 0, /* tp_reserved */
3814 0, /* tp_repr */
3815 0, /* tp_as_number */
3816 0, /* tp_as_sequence */
3817 0, /* tp_as_mapping */
3818 0, /* tp_hash */
3819 0, /* tp_call */
3820 0, /* tp_str */
3821 0, /* tp_getattro */
3822 0, /* tp_setattro */
3823 0, /* tp_as_buffer */
3824 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3825 /* tp_flags */
3826 0, /* tp_doc */
3827 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3828 (inquiry)treebuilder_gc_clear, /* tp_clear */
3829 0, /* tp_richcompare */
3830 0, /* tp_weaklistoffset */
3831 0, /* tp_iter */
3832 0, /* tp_iternext */
3833 treebuilder_methods, /* tp_methods */
3834 0, /* tp_members */
3835 0, /* tp_getset */
3836 0, /* tp_base */
3837 0, /* tp_dict */
3838 0, /* tp_descr_get */
3839 0, /* tp_descr_set */
3840 0, /* tp_dictoffset */
3841 _elementtree_TreeBuilder___init__, /* tp_init */
3842 PyType_GenericAlloc, /* tp_alloc */
3843 treebuilder_new, /* tp_new */
3844 0, /* tp_free */
3845};
3846
3847static PyMethodDef xmlparser_methods[] = {
3848 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3849 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3850 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3851 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3852 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3853 {NULL, NULL}
3854};
3855
Neal Norwitz227b5332006-03-22 09:28:35 +00003856static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003857 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003858 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003859 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003860 (destructor)xmlparser_dealloc, /* tp_dealloc */
3861 0, /* tp_print */
3862 0, /* tp_getattr */
3863 0, /* tp_setattr */
3864 0, /* tp_reserved */
3865 0, /* tp_repr */
3866 0, /* tp_as_number */
3867 0, /* tp_as_sequence */
3868 0, /* tp_as_mapping */
3869 0, /* tp_hash */
3870 0, /* tp_call */
3871 0, /* tp_str */
3872 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3873 0, /* tp_setattro */
3874 0, /* tp_as_buffer */
3875 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3876 /* tp_flags */
3877 0, /* tp_doc */
3878 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3879 (inquiry)xmlparser_gc_clear, /* tp_clear */
3880 0, /* tp_richcompare */
3881 0, /* tp_weaklistoffset */
3882 0, /* tp_iter */
3883 0, /* tp_iternext */
3884 xmlparser_methods, /* tp_methods */
3885 0, /* tp_members */
3886 0, /* tp_getset */
3887 0, /* tp_base */
3888 0, /* tp_dict */
3889 0, /* tp_descr_get */
3890 0, /* tp_descr_set */
3891 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003892 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003893 PyType_GenericAlloc, /* tp_alloc */
3894 xmlparser_new, /* tp_new */
3895 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003896};
3897
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003898/* ==================================================================== */
3899/* python module interface */
3900
3901static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003902 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003903 {NULL, NULL}
3904};
3905
Martin v. Löwis1a214512008-06-11 05:26:20 +00003906
Eli Bendersky532d03e2013-08-10 08:00:39 -07003907static struct PyModuleDef elementtreemodule = {
3908 PyModuleDef_HEAD_INIT,
3909 "_elementtree",
3910 NULL,
3911 sizeof(elementtreestate),
3912 _functions,
3913 NULL,
3914 elementtree_traverse,
3915 elementtree_clear,
3916 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003917};
3918
Neal Norwitzf6657e62006-12-28 04:47:50 +00003919PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003920PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003921{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003922 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003923 elementtreestate *st;
3924
3925 m = PyState_FindModule(&elementtreemodule);
3926 if (m) {
3927 Py_INCREF(m);
3928 return m;
3929 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003930
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003931 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003932 if (PyType_Ready(&ElementIter_Type) < 0)
3933 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003934 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003935 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003936 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003937 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003938 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003939 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003940
Eli Bendersky532d03e2013-08-10 08:00:39 -07003941 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003942 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003943 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003944 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003945
Eli Bendersky828efde2012-04-05 05:40:58 +03003946 if (!(temp = PyImport_ImportModule("copy")))
3947 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003948 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003949 Py_XDECREF(temp);
3950
Eli Bendersky532d03e2013-08-10 08:00:39 -07003951 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003952 return NULL;
3953
Eli Bendersky20d41742012-06-01 09:48:37 +03003954 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003955 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3956 if (expat_capi) {
3957 /* check that it's usable */
3958 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003959 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003960 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3961 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003962 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003963 PyErr_SetString(PyExc_ImportError,
3964 "pyexpat version is incompatible");
3965 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003966 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003967 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003968 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003969 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003970
Eli Bendersky532d03e2013-08-10 08:00:39 -07003971 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003972 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003973 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003974 Py_INCREF(st->parseerror_obj);
3975 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003976
Eli Bendersky092af1f2012-03-04 07:14:03 +02003977 Py_INCREF((PyObject *)&Element_Type);
3978 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3979
Eli Bendersky58d548d2012-05-29 15:45:16 +03003980 Py_INCREF((PyObject *)&TreeBuilder_Type);
3981 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3982
Eli Bendersky52467b12012-06-01 07:13:08 +03003983 Py_INCREF((PyObject *)&XMLParser_Type);
3984 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003985
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003986 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003987}