blob: 3cf3d59f12021b332f8ec5d0e363c325c859157f [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Martin v. Löwisbce16662012-06-17 10:41:22 +0200850 Py_ssize_t result = sizeof(ElementObject);
851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
938 Py_CLEAR(self->tag);
939 self->tag = tag;
940 Py_INCREF(self->tag);
941
Eli Benderskydd3661e2013-09-13 06:24:25 -0700942 _clear_joined_ptr(&self->text);
943 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
944 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945
Eli Benderskydd3661e2013-09-13 06:24:25 -0700946 _clear_joined_ptr(&self->tail);
947 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
948 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949
950 /* Handle ATTRIB and CHILDREN. */
951 if (!children && !attrib)
952 Py_RETURN_NONE;
953
954 /* Compute 'nchildren'. */
955 if (children) {
956 if (!PyList_Check(children)) {
957 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
958 return NULL;
959 }
960 nchildren = PyList_Size(children);
961 }
962 else {
963 nchildren = 0;
964 }
965
966 /* Allocate 'extra'. */
967 if (element_resize(self, nchildren)) {
968 return NULL;
969 }
970 assert(self->extra && self->extra->allocated >= nchildren);
971
972 /* Copy children */
973 for (i = 0; i < nchildren; i++) {
974 self->extra->children[i] = PyList_GET_ITEM(children, i);
975 Py_INCREF(self->extra->children[i]);
976 }
977
978 self->extra->length = nchildren;
979 self->extra->allocated = nchildren;
980
981 /* Stash attrib. */
982 if (attrib) {
983 Py_CLEAR(self->extra->attrib);
984 self->extra->attrib = attrib;
985 Py_INCREF(attrib);
986 }
987
988 Py_RETURN_NONE;
989}
990
991/* __setstate__ for Element instance from the Python implementation.
992 * 'state' should be the instance dict.
993 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300994
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995static PyObject *
996element_setstate_from_Python(ElementObject *self, PyObject *state)
997{
998 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
999 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1000 PyObject *args;
1001 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001002 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004 tag = attrib = text = tail = children = NULL;
1005 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001008
1009 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1010 &attrib, &text, &tail, &children))
1011 retval = element_setstate_from_attributes(self, tag, attrib, text,
1012 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001014 retval = NULL;
1015
1016 Py_DECREF(args);
1017 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018}
1019
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020/*[clinic input]
1021_elementtree.Element.__setstate__
1022
1023 state: object
1024 /
1025
1026[clinic start generated code]*/
1027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001029_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1030/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031{
1032 if (!PyDict_CheckExact(state)) {
1033 PyErr_Format(PyExc_TypeError,
1034 "Don't know how to unpickle \"%.200R\" as an Element",
1035 state);
1036 return NULL;
1037 }
1038 else
1039 return element_setstate_from_Python(self, state);
1040}
1041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042LOCAL(int)
1043checkpath(PyObject* tag)
1044{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 Py_ssize_t i;
1046 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047
1048 /* check if a tag contains an xpath character */
1049
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050#define PATHCHAR(ch) \
1051 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1055 void *data = PyUnicode_DATA(tag);
1056 unsigned int kind = PyUnicode_KIND(tag);
1057 for (i = 0; i < len; i++) {
1058 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1059 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 return 1;
1065 }
1066 return 0;
1067 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001068 if (PyBytes_Check(tag)) {
1069 char *p = PyBytes_AS_STRING(tag);
1070 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071 if (p[i] == '{')
1072 check = 0;
1073 else if (p[i] == '}')
1074 check = 1;
1075 else if (check && PATHCHAR(p[i]))
1076 return 1;
1077 }
1078 return 0;
1079 }
1080
1081 return 1; /* unknown type; might be path expression */
1082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.extend
1086
1087 elements: object
1088 /
1089
1090[clinic start generated code]*/
1091
1092static PyObject *
1093_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1094/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095{
1096 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001097 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098
Serhiy Storchakacb985562015-05-04 15:32:48 +03001099 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100 if (!seq) {
1101 PyErr_Format(
1102 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104 );
1105 return NULL;
1106 }
1107
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001110 Py_INCREF(element);
1111 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001112 PyErr_Format(
1113 PyExc_TypeError,
1114 "expected an Element, not \"%.200s\"",
1115 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001116 Py_DECREF(seq);
1117 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001118 return NULL;
1119 }
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (element_add_subelement(self, element) < 0) {
1122 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 return NULL;
1125 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 }
1128
1129 Py_DECREF(seq);
1130
1131 Py_RETURN_NONE;
1132}
1133
Serhiy Storchakacb985562015-05-04 15:32:48 +03001134/*[clinic input]
1135_elementtree.Element.find
1136
1137 path: object
1138 namespaces: object = None
1139
1140[clinic start generated code]*/
1141
1142static PyObject *
1143_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1144 PyObject *namespaces)
1145/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001147 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001148 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149
Serhiy Storchakacb985562015-05-04 15:32:48 +03001150 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001151 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001153 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001155 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156
1157 if (!self->extra)
1158 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 for (i = 0; i < self->extra->length; i++) {
1161 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001162 int rc;
1163 if (!Element_CheckExact(item))
1164 continue;
1165 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001166 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_DECREF(item);
1170 if (rc < 0)
1171 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 }
1173
1174 Py_RETURN_NONE;
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.findtext
1179
1180 path: object
1181 default: object = None
1182 namespaces: object = None
1183
1184[clinic start generated code]*/
1185
1186static PyObject *
1187_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1188 PyObject *default_value,
1189 PyObject *namespaces)
1190/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001192 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001193 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001194 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 );
1200
1201 if (!self->extra) {
1202 Py_INCREF(default_value);
1203 return default_value;
1204 }
1205
1206 for (i = 0; i < self->extra->length; i++) {
1207 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 int rc;
1209 if (!Element_CheckExact(item))
1210 continue;
1211 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001212 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 if (text == Py_None) {
1216 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001217 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001219 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001220 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 return text;
1222 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 Py_DECREF(item);
1224 if (rc < 0)
1225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 Py_INCREF(default_value);
1229 return default_value;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.findall
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001247 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001248 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001249
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001251 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001252 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001255 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256
1257 out = PyList_New(0);
1258 if (!out)
1259 return NULL;
1260
1261 if (!self->extra)
1262 return out;
1263
1264 for (i = 0; i < self->extra->length; i++) {
1265 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 int rc;
1267 if (!Element_CheckExact(item))
1268 continue;
1269 Py_INCREF(item);
1270 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1271 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1272 Py_DECREF(item);
1273 Py_DECREF(out);
1274 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001276 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 }
1278
1279 return out;
1280}
1281
Serhiy Storchakacb985562015-05-04 15:32:48 +03001282/*[clinic input]
1283_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001284
Serhiy Storchakacb985562015-05-04 15:32:48 +03001285 path: object
1286 namespaces: object = None
1287
1288[clinic start generated code]*/
1289
1290static PyObject *
1291_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1292 PyObject *namespaces)
1293/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1294{
1295 PyObject* tag = path;
1296 _Py_IDENTIFIER(iterfind);
1297 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001298
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001299 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001300 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001301}
1302
Serhiy Storchakacb985562015-05-04 15:32:48 +03001303/*[clinic input]
1304_elementtree.Element.get
1305
1306 key: object
1307 default: object = None
1308
1309[clinic start generated code]*/
1310
1311static PyObject *
1312_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1313 PyObject *default_value)
1314/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
1316 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
1318 if (!self->extra || self->extra->attrib == Py_None)
1319 value = default_value;
1320 else {
1321 value = PyDict_GetItem(self->extra->attrib, key);
1322 if (!value)
1323 value = default_value;
1324 }
1325
1326 Py_INCREF(value);
1327 return value;
1328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.getchildren
1332
1333[clinic start generated code]*/
1334
1335static PyObject *
1336_elementtree_Element_getchildren_impl(ElementObject *self)
1337/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001339 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340 PyObject* list;
1341
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001342 /* FIXME: report as deprecated? */
1343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344 if (!self->extra)
1345 return PyList_New(0);
1346
1347 list = PyList_New(self->extra->length);
1348 if (!list)
1349 return NULL;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
1353 Py_INCREF(item);
1354 PyList_SET_ITEM(list, i, item);
1355 }
1356
1357 return list;
1358}
1359
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001360
Eli Bendersky64d11e62012-06-15 07:42:50 +03001361static PyObject *
1362create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1363
1364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365/*[clinic input]
1366_elementtree.Element.iter
1367
1368 tag: object = None
1369
1370[clinic start generated code]*/
1371
Eli Bendersky64d11e62012-06-15 07:42:50 +03001372static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001373_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1374/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001375{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001376 if (PyUnicode_Check(tag)) {
1377 if (PyUnicode_READY(tag) < 0)
1378 return NULL;
1379 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1380 tag = Py_None;
1381 }
1382 else if (PyBytes_Check(tag)) {
1383 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1384 tag = Py_None;
1385 }
1386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001388}
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Serhiy Storchakacb985562015-05-04 15:32:48 +03001394[clinic start generated code]*/
1395
1396static PyObject *
1397_elementtree_Element_itertext_impl(ElementObject *self)
1398/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1399{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401}
1402
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001405element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001406{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001407 ElementObject* self = (ElementObject*) self_;
1408
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409 if (!self->extra || index < 0 || index >= self->extra->length) {
1410 PyErr_SetString(
1411 PyExc_IndexError,
1412 "child index out of range"
1413 );
1414 return NULL;
1415 }
1416
1417 Py_INCREF(self->extra->children[index]);
1418 return self->extra->children[index];
1419}
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
1422_elementtree.Element.insert
1423
1424 index: Py_ssize_t
1425 subelement: object(subclass_of='&Element_Type')
1426 /
1427
1428[clinic start generated code]*/
1429
1430static PyObject *
1431_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1432 PyObject *subelement)
1433/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001435 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436
Victor Stinner5f0af232013-07-11 23:01:36 +02001437 if (!self->extra) {
1438 if (create_extra(self, NULL) < 0)
1439 return NULL;
1440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001442 if (index < 0) {
1443 index += self->extra->length;
1444 if (index < 0)
1445 index = 0;
1446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001447 if (index > self->extra->length)
1448 index = self->extra->length;
1449
1450 if (element_resize(self, 1) < 0)
1451 return NULL;
1452
1453 for (i = self->extra->length; i > index; i--)
1454 self->extra->children[i] = self->extra->children[i-1];
1455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456 Py_INCREF(subelement);
1457 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458
1459 self->extra->length++;
1460
1461 Py_RETURN_NONE;
1462}
1463
Serhiy Storchakacb985562015-05-04 15:32:48 +03001464/*[clinic input]
1465_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466
Serhiy Storchakacb985562015-05-04 15:32:48 +03001467[clinic start generated code]*/
1468
1469static PyObject *
1470_elementtree_Element_items_impl(ElementObject *self)
1471/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1472{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (!self->extra || self->extra->attrib == Py_None)
1474 return PyList_New(0);
1475
1476 return PyDict_Items(self->extra->attrib);
1477}
1478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479/*[clinic input]
1480_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482[clinic start generated code]*/
1483
1484static PyObject *
1485_elementtree_Element_keys_impl(ElementObject *self)
1486/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1487{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488 if (!self->extra || self->extra->attrib == Py_None)
1489 return PyList_New(0);
1490
1491 return PyDict_Keys(self->extra->attrib);
1492}
1493
Martin v. Löwis18e16552006-02-15 17:27:45 +00001494static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495element_length(ElementObject* self)
1496{
1497 if (!self->extra)
1498 return 0;
1499
1500 return self->extra->length;
1501}
1502
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503/*[clinic input]
1504_elementtree.Element.makeelement
1505
1506 tag: object
1507 attrib: object
1508 /
1509
1510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1514 PyObject *attrib)
1515/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516{
1517 PyObject* elem;
1518
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519 attrib = PyDict_Copy(attrib);
1520 if (!attrib)
1521 return NULL;
1522
Eli Bendersky092af1f2012-03-04 07:14:03 +02001523 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524
1525 Py_DECREF(attrib);
1526
1527 return elem;
1528}
1529
Serhiy Storchakacb985562015-05-04 15:32:48 +03001530/*[clinic input]
1531_elementtree.Element.remove
1532
1533 subelement: object(subclass_of='&Element_Type')
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1540/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001542 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001543 int rc;
1544 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546 if (!self->extra) {
1547 /* element has no children, so raise exception */
1548 PyErr_SetString(
1549 PyExc_ValueError,
1550 "list.remove(x): x not in list"
1551 );
1552 return NULL;
1553 }
1554
1555 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001558 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001559 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001560 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001561 if (rc < 0)
1562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 }
1564
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001565 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001566 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 PyErr_SetString(
1568 PyExc_ValueError,
1569 "list.remove(x): x not in list"
1570 );
1571 return NULL;
1572 }
1573
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001574 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
1576 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577 for (; i < self->extra->length; i++)
1578 self->extra->children[i] = self->extra->children[i+1];
1579
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001580 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581 Py_RETURN_NONE;
1582}
1583
1584static PyObject*
1585element_repr(ElementObject* self)
1586{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001587 if (self->tag)
1588 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1589 else
1590 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591}
1592
Serhiy Storchakacb985562015-05-04 15:32:48 +03001593/*[clinic input]
1594_elementtree.Element.set
1595
1596 key: object
1597 value: object
1598 /
1599
1600[clinic start generated code]*/
1601
1602static PyObject *
1603_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1604 PyObject *value)
1605/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606{
1607 PyObject* attrib;
1608
Victor Stinner5f0af232013-07-11 23:01:36 +02001609 if (!self->extra) {
1610 if (create_extra(self, NULL) < 0)
1611 return NULL;
1612 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613
1614 attrib = element_get_attrib(self);
1615 if (!attrib)
1616 return NULL;
1617
1618 if (PyDict_SetItem(attrib, key, value) < 0)
1619 return NULL;
1620
1621 Py_RETURN_NONE;
1622}
1623
1624static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001625element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001627 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001628 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 PyObject* old;
1630
1631 if (!self->extra || index < 0 || index >= self->extra->length) {
1632 PyErr_SetString(
1633 PyExc_IndexError,
1634 "child assignment index out of range");
1635 return -1;
1636 }
1637
1638 old = self->extra->children[index];
1639
1640 if (item) {
1641 Py_INCREF(item);
1642 self->extra->children[index] = item;
1643 } else {
1644 self->extra->length--;
1645 for (i = index; i < self->extra->length; i++)
1646 self->extra->children[i] = self->extra->children[i+1];
1647 }
1648
1649 Py_DECREF(old);
1650
1651 return 0;
1652}
1653
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001654static PyObject*
1655element_subscr(PyObject* self_, PyObject* item)
1656{
1657 ElementObject* self = (ElementObject*) self_;
1658
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001659 if (PyIndex_Check(item)) {
1660 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001661
1662 if (i == -1 && PyErr_Occurred()) {
1663 return NULL;
1664 }
1665 if (i < 0 && self->extra)
1666 i += self->extra->length;
1667 return element_getitem(self_, i);
1668 }
1669 else if (PySlice_Check(item)) {
1670 Py_ssize_t start, stop, step, slicelen, cur, i;
1671 PyObject* list;
1672
1673 if (!self->extra)
1674 return PyList_New(0);
1675
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001676 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001677 self->extra->length,
1678 &start, &stop, &step, &slicelen) < 0) {
1679 return NULL;
1680 }
1681
1682 if (slicelen <= 0)
1683 return PyList_New(0);
1684 else {
1685 list = PyList_New(slicelen);
1686 if (!list)
1687 return NULL;
1688
1689 for (cur = start, i = 0; i < slicelen;
1690 cur += step, i++) {
1691 PyObject* item = self->extra->children[cur];
1692 Py_INCREF(item);
1693 PyList_SET_ITEM(list, i, item);
1694 }
1695
1696 return list;
1697 }
1698 }
1699 else {
1700 PyErr_SetString(PyExc_TypeError,
1701 "element indices must be integers");
1702 return NULL;
1703 }
1704}
1705
1706static int
1707element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1708{
1709 ElementObject* self = (ElementObject*) self_;
1710
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001711 if (PyIndex_Check(item)) {
1712 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001713
1714 if (i == -1 && PyErr_Occurred()) {
1715 return -1;
1716 }
1717 if (i < 0 && self->extra)
1718 i += self->extra->length;
1719 return element_setitem(self_, i, value);
1720 }
1721 else if (PySlice_Check(item)) {
1722 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1723
1724 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001725 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
Victor Stinner5f0af232013-07-11 23:01:36 +02001727 if (!self->extra) {
1728 if (create_extra(self, NULL) < 0)
1729 return -1;
1730 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001731
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001732 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001733 self->extra->length,
1734 &start, &stop, &step, &slicelen) < 0) {
1735 return -1;
1736 }
1737
Eli Bendersky865756a2012-03-09 13:38:15 +02001738 if (value == NULL) {
1739 /* Delete slice */
1740 size_t cur;
1741 Py_ssize_t i;
1742
1743 if (slicelen <= 0)
1744 return 0;
1745
1746 /* Since we're deleting, the direction of the range doesn't matter,
1747 * so for simplicity make it always ascending.
1748 */
1749 if (step < 0) {
1750 stop = start + 1;
1751 start = stop + step * (slicelen - 1) - 1;
1752 step = -step;
1753 }
1754
1755 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1756
1757 /* recycle is a list that will contain all the children
1758 * scheduled for removal.
1759 */
1760 if (!(recycle = PyList_New(slicelen))) {
1761 PyErr_NoMemory();
1762 return -1;
1763 }
1764
1765 /* This loop walks over all the children that have to be deleted,
1766 * with cur pointing at them. num_moved is the amount of children
1767 * until the next deleted child that have to be "shifted down" to
1768 * occupy the deleted's places.
1769 * Note that in the ith iteration, shifting is done i+i places down
1770 * because i children were already removed.
1771 */
1772 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1773 /* Compute how many children have to be moved, clipping at the
1774 * list end.
1775 */
1776 Py_ssize_t num_moved = step - 1;
1777 if (cur + step >= (size_t)self->extra->length) {
1778 num_moved = self->extra->length - cur - 1;
1779 }
1780
1781 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1782
1783 memmove(
1784 self->extra->children + cur - i,
1785 self->extra->children + cur + 1,
1786 num_moved * sizeof(PyObject *));
1787 }
1788
1789 /* Leftover "tail" after the last removed child */
1790 cur = start + (size_t)slicelen * step;
1791 if (cur < (size_t)self->extra->length) {
1792 memmove(
1793 self->extra->children + cur - slicelen,
1794 self->extra->children + cur,
1795 (self->extra->length - cur) * sizeof(PyObject *));
1796 }
1797
1798 self->extra->length -= slicelen;
1799
1800 /* Discard the recycle list with all the deleted sub-elements */
1801 Py_XDECREF(recycle);
1802 return 0;
1803 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001804
1805 /* A new slice is actually being assigned */
1806 seq = PySequence_Fast(value, "");
1807 if (!seq) {
1808 PyErr_Format(
1809 PyExc_TypeError,
1810 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1811 );
1812 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001813 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001814 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001815
1816 if (step != 1 && newlen != slicelen)
1817 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001818 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001819 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 "attempt to assign sequence of size %zd "
1821 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001822 newlen, slicelen
1823 );
1824 return -1;
1825 }
1826
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001827 /* Resize before creating the recycle bin, to prevent refleaks. */
1828 if (newlen > slicelen) {
1829 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001830 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831 return -1;
1832 }
1833 }
1834
1835 if (slicelen > 0) {
1836 /* to avoid recursive calls to this method (via decref), move
1837 old items to the recycle bin here, and get rid of them when
1838 we're done modifying the element */
1839 recycle = PyList_New(slicelen);
1840 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001841 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001842 return -1;
1843 }
1844 for (cur = start, i = 0; i < slicelen;
1845 cur += step, i++)
1846 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1847 }
1848
1849 if (newlen < slicelen) {
1850 /* delete slice */
1851 for (i = stop; i < self->extra->length; i++)
1852 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1853 } else if (newlen > slicelen) {
1854 /* insert slice */
1855 for (i = self->extra->length-1; i >= stop; i--)
1856 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1857 }
1858
1859 /* replace the slice */
1860 for (cur = start, i = 0; i < newlen;
1861 cur += step, i++) {
1862 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1863 Py_INCREF(element);
1864 self->extra->children[cur] = element;
1865 }
1866
1867 self->extra->length += newlen - slicelen;
1868
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870
1871 /* discard the recycle bin, and everything in it */
1872 Py_XDECREF(recycle);
1873
1874 return 0;
1875 }
1876 else {
1877 PyErr_SetString(PyExc_TypeError,
1878 "element indices must be integers");
1879 return -1;
1880 }
1881}
1882
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001884element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001886 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001888 return res;
1889}
1890
Serhiy Storchakadde08152015-11-25 15:28:13 +02001891static PyObject*
1892element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001893{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001894 PyObject *res = element_get_text(self);
1895 Py_XINCREF(res);
1896 return res;
1897}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001898
Serhiy Storchakadde08152015-11-25 15:28:13 +02001899static PyObject*
1900element_tail_getter(ElementObject *self, void *closure)
1901{
1902 PyObject *res = element_get_tail(self);
1903 Py_XINCREF(res);
1904 return res;
1905}
1906
1907static PyObject*
1908element_attrib_getter(ElementObject *self, void *closure)
1909{
1910 PyObject *res;
1911 if (!self->extra) {
1912 if (create_extra(self, NULL) < 0)
1913 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001914 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001915 res = element_get_attrib(self);
1916 Py_XINCREF(res);
1917 return res;
1918}
Victor Stinner4d463432013-07-11 23:05:03 +02001919
Serhiy Storchakadde08152015-11-25 15:28:13 +02001920/* macro for setter validation */
1921#define _VALIDATE_ATTR_VALUE(V) \
1922 if ((V) == NULL) { \
1923 PyErr_SetString( \
1924 PyExc_AttributeError, \
1925 "can't delete element attribute"); \
1926 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927 }
1928
Serhiy Storchakadde08152015-11-25 15:28:13 +02001929static int
1930element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1931{
1932 _VALIDATE_ATTR_VALUE(value);
1933 Py_INCREF(value);
1934 Py_DECREF(self->tag);
1935 self->tag = value;
1936 return 0;
1937}
1938
1939static int
1940element_text_setter(ElementObject *self, PyObject *value, void *closure)
1941{
1942 _VALIDATE_ATTR_VALUE(value);
1943 Py_INCREF(value);
1944 Py_DECREF(JOIN_OBJ(self->text));
1945 self->text = value;
1946 return 0;
1947}
1948
1949static int
1950element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1951{
1952 _VALIDATE_ATTR_VALUE(value);
1953 Py_INCREF(value);
1954 Py_DECREF(JOIN_OBJ(self->tail));
1955 self->tail = value;
1956 return 0;
1957}
1958
1959static int
1960element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1961{
1962 _VALIDATE_ATTR_VALUE(value);
1963 if (!self->extra) {
1964 if (create_extra(self, NULL) < 0)
1965 return -1;
1966 }
1967 Py_INCREF(value);
1968 Py_DECREF(self->extra->attrib);
1969 self->extra->attrib = value;
Eli Benderskyef9683b2013-05-18 07:52:34 -07001970 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001971}
1972
1973static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001975 0, /* sq_concat */
1976 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001977 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001978 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001980 0,
1981};
1982
Eli Bendersky64d11e62012-06-15 07:42:50 +03001983/******************************* Element iterator ****************************/
1984
1985/* ElementIterObject represents the iteration state over an XML element in
1986 * pre-order traversal. To keep track of which sub-element should be returned
1987 * next, a stack of parents is maintained. This is a standard stack-based
1988 * iterative pre-order traversal of a tree.
1989 * The stack is managed using a single-linked list starting at parent_stack.
1990 * Each stack node contains the saved parent to which we should return after
1991 * the current one is exhausted, and the next child to examine in that parent.
1992 */
1993typedef struct ParentLocator_t {
1994 ElementObject *parent;
1995 Py_ssize_t child_index;
1996 struct ParentLocator_t *next;
1997} ParentLocator;
1998
1999typedef struct {
2000 PyObject_HEAD
2001 ParentLocator *parent_stack;
2002 ElementObject *root_element;
2003 PyObject *sought_tag;
2004 int root_done;
2005 int gettext;
2006} ElementIterObject;
2007
2008
2009static void
2010elementiter_dealloc(ElementIterObject *it)
2011{
2012 ParentLocator *p = it->parent_stack;
2013 while (p) {
2014 ParentLocator *temp = p;
2015 Py_XDECREF(p->parent);
2016 p = p->next;
2017 PyObject_Free(temp);
2018 }
2019
2020 Py_XDECREF(it->sought_tag);
2021 Py_XDECREF(it->root_element);
2022
2023 PyObject_GC_UnTrack(it);
2024 PyObject_GC_Del(it);
2025}
2026
2027static int
2028elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2029{
2030 ParentLocator *p = it->parent_stack;
2031 while (p) {
2032 Py_VISIT(p->parent);
2033 p = p->next;
2034 }
2035
2036 Py_VISIT(it->root_element);
2037 Py_VISIT(it->sought_tag);
2038 return 0;
2039}
2040
2041/* Helper function for elementiter_next. Add a new parent to the parent stack.
2042 */
2043static ParentLocator *
2044parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2045{
2046 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2047 if (new_node) {
2048 new_node->parent = parent;
2049 Py_INCREF(parent);
2050 new_node->child_index = 0;
2051 new_node->next = stack;
2052 }
2053 return new_node;
2054}
2055
2056static PyObject *
2057elementiter_next(ElementIterObject *it)
2058{
2059 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002060 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061 * A short note on gettext: this function serves both the iter() and
2062 * itertext() methods to avoid code duplication. However, there are a few
2063 * small differences in the way these iterations work. Namely:
2064 * - itertext() only yields text from nodes that have it, and continues
2065 * iterating when a node doesn't have text (so it doesn't return any
2066 * node like iter())
2067 * - itertext() also has to handle tail, after finishing with all the
2068 * children of a node.
2069 */
Eli Bendersky113da642012-06-15 07:52:49 +03002070 ElementObject *cur_parent;
2071 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002072 int rc;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002073
2074 while (1) {
2075 /* Handle the case reached in the beginning and end of iteration, where
2076 * the parent stack is empty. The root_done flag gives us indication
2077 * whether we've just started iterating (so root_done is 0), in which
2078 * case the root is returned. If root_done is 1 and we're here, the
2079 * iterator is exhausted.
2080 */
2081 if (!it->parent_stack->parent) {
2082 if (it->root_done) {
2083 PyErr_SetNone(PyExc_StopIteration);
2084 return NULL;
2085 } else {
2086 it->parent_stack = parent_stack_push_new(it->parent_stack,
2087 it->root_element);
2088 if (!it->parent_stack) {
2089 PyErr_NoMemory();
2090 return NULL;
2091 }
2092
2093 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002094 rc = (it->sought_tag == Py_None);
2095 if (!rc) {
2096 rc = PyObject_RichCompareBool(it->root_element->tag,
2097 it->sought_tag, Py_EQ);
2098 if (rc < 0)
2099 return NULL;
2100 }
2101 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002103 PyObject *text = element_get_text(it->root_element);
2104 if (!text)
2105 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002106 rc = PyObject_IsTrue(text);
2107 if (rc < 0)
2108 return NULL;
2109 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002110 Py_INCREF(text);
2111 return text;
2112 }
2113 } else {
2114 Py_INCREF(it->root_element);
2115 return (PyObject *)it->root_element;
2116 }
2117 }
2118 }
2119 }
2120
2121 /* See if there are children left to traverse in the current parent. If
2122 * yes, visit the next child. If not, pop the stack and try again.
2123 */
Eli Bendersky113da642012-06-15 07:52:49 +03002124 cur_parent = it->parent_stack->parent;
2125 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2127 ElementObject *child = (ElementObject *)
2128 cur_parent->extra->children[child_index];
2129 it->parent_stack->child_index++;
2130 it->parent_stack = parent_stack_push_new(it->parent_stack,
2131 child);
2132 if (!it->parent_stack) {
2133 PyErr_NoMemory();
2134 return NULL;
2135 }
2136
2137 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002138 PyObject *text = element_get_text(child);
2139 if (!text)
2140 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002141 rc = PyObject_IsTrue(text);
2142 if (rc < 0)
2143 return NULL;
2144 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145 Py_INCREF(text);
2146 return text;
2147 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002148 } else {
2149 rc = (it->sought_tag == Py_None);
2150 if (!rc) {
2151 rc = PyObject_RichCompareBool(child->tag,
2152 it->sought_tag, Py_EQ);
2153 if (rc < 0)
2154 return NULL;
2155 }
2156 if (rc) {
2157 Py_INCREF(child);
2158 return (PyObject *)child;
2159 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002161 }
2162 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002163 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002164 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002165 if (it->gettext) {
2166 tail = element_get_tail(cur_parent);
2167 if (!tail)
2168 return NULL;
2169 }
2170 else
2171 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002172 Py_XDECREF(it->parent_stack->parent);
2173 PyObject_Free(it->parent_stack);
2174 it->parent_stack = next;
2175
2176 /* Note that extra condition on it->parent_stack->parent here;
2177 * this is because itertext() is supposed to only return *inner*
2178 * text, not text following the element it began iteration with.
2179 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002180 if (it->parent_stack->parent) {
2181 rc = PyObject_IsTrue(tail);
2182 if (rc < 0)
2183 return NULL;
2184 if (rc) {
2185 Py_INCREF(tail);
2186 return tail;
2187 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 }
2189 }
2190 }
2191
2192 return NULL;
2193}
2194
2195
2196static PyTypeObject ElementIter_Type = {
2197 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002198 /* Using the module's name since the pure-Python implementation does not
2199 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 "_elementtree._element_iterator", /* tp_name */
2201 sizeof(ElementIterObject), /* tp_basicsize */
2202 0, /* tp_itemsize */
2203 /* methods */
2204 (destructor)elementiter_dealloc, /* tp_dealloc */
2205 0, /* tp_print */
2206 0, /* tp_getattr */
2207 0, /* tp_setattr */
2208 0, /* tp_reserved */
2209 0, /* tp_repr */
2210 0, /* tp_as_number */
2211 0, /* tp_as_sequence */
2212 0, /* tp_as_mapping */
2213 0, /* tp_hash */
2214 0, /* tp_call */
2215 0, /* tp_str */
2216 0, /* tp_getattro */
2217 0, /* tp_setattro */
2218 0, /* tp_as_buffer */
2219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2220 0, /* tp_doc */
2221 (traverseproc)elementiter_traverse, /* tp_traverse */
2222 0, /* tp_clear */
2223 0, /* tp_richcompare */
2224 0, /* tp_weaklistoffset */
2225 PyObject_SelfIter, /* tp_iter */
2226 (iternextfunc)elementiter_next, /* tp_iternext */
2227 0, /* tp_methods */
2228 0, /* tp_members */
2229 0, /* tp_getset */
2230 0, /* tp_base */
2231 0, /* tp_dict */
2232 0, /* tp_descr_get */
2233 0, /* tp_descr_set */
2234 0, /* tp_dictoffset */
2235 0, /* tp_init */
2236 0, /* tp_alloc */
2237 0, /* tp_new */
2238};
2239
2240
2241static PyObject *
2242create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2243{
2244 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002245
2246 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2247 if (!it)
2248 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002249
Victor Stinner4d463432013-07-11 23:05:03 +02002250 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251 it->sought_tag = tag;
2252 it->root_done = 0;
2253 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002254 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002255 it->root_element = self;
2256
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002258
2259 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2260 if (it->parent_stack == NULL) {
2261 Py_DECREF(it);
2262 PyErr_NoMemory();
2263 return NULL;
2264 }
2265 it->parent_stack->parent = NULL;
2266 it->parent_stack->child_index = 0;
2267 it->parent_stack->next = NULL;
2268
Eli Bendersky64d11e62012-06-15 07:42:50 +03002269 return (PyObject *)it;
2270}
2271
2272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002273/* ==================================================================== */
2274/* the tree builder type */
2275
2276typedef struct {
2277 PyObject_HEAD
2278
Eli Bendersky58d548d2012-05-29 15:45:16 +03002279 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280
Antoine Pitrouee329312012-10-04 19:53:29 +02002281 PyObject *this; /* current node */
2282 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283
Eli Bendersky58d548d2012-05-29 15:45:16 +03002284 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
Eli Bendersky58d548d2012-05-29 15:45:16 +03002286 PyObject *stack; /* element stack */
2287 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288
Eli Bendersky48d358b2012-05-30 17:57:50 +03002289 PyObject *element_factory;
2290
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002292 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002293 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2294 PyObject *end_event_obj;
2295 PyObject *start_ns_event_obj;
2296 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297} TreeBuilderObject;
2298
Christian Heimes90aa7642007-12-19 02:45:37 +00002299#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300
2301/* -------------------------------------------------------------------- */
2302/* constructor and destructor */
2303
Eli Bendersky58d548d2012-05-29 15:45:16 +03002304static PyObject *
2305treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002306{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002307 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2308 if (t != NULL) {
2309 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310
Eli Bendersky58d548d2012-05-29 15:45:16 +03002311 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002312 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002314 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002317 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 t->stack = PyList_New(20);
2319 if (!t->stack) {
2320 Py_DECREF(t->this);
2321 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002322 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 return NULL;
2324 }
2325 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002327 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002328 t->start_event_obj = t->end_event_obj = NULL;
2329 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2330 }
2331 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332}
2333
Serhiy Storchakacb985562015-05-04 15:32:48 +03002334/*[clinic input]
2335_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002336
Serhiy Storchakacb985562015-05-04 15:32:48 +03002337 element_factory: object = NULL
2338
2339[clinic start generated code]*/
2340
2341static int
2342_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2343 PyObject *element_factory)
2344/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2345{
2346 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002347
2348 if (element_factory) {
2349 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002350 tmp = self->element_factory;
2351 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002352 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002353 }
2354
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356}
2357
Eli Bendersky48d358b2012-05-30 17:57:50 +03002358static int
2359treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2360{
2361 Py_VISIT(self->root);
2362 Py_VISIT(self->this);
2363 Py_VISIT(self->last);
2364 Py_VISIT(self->data);
2365 Py_VISIT(self->stack);
2366 Py_VISIT(self->element_factory);
2367 return 0;
2368}
2369
2370static int
2371treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002373 Py_CLEAR(self->end_ns_event_obj);
2374 Py_CLEAR(self->start_ns_event_obj);
2375 Py_CLEAR(self->end_event_obj);
2376 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002377 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002378 Py_CLEAR(self->stack);
2379 Py_CLEAR(self->data);
2380 Py_CLEAR(self->last);
2381 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002383 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384 return 0;
2385}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387static void
2388treebuilder_dealloc(TreeBuilderObject *self)
2389{
2390 PyObject_GC_UnTrack(self);
2391 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002392 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393}
2394
2395/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002396/* helpers for handling of arbitrary element-like objects */
2397
2398static int
2399treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2400 PyObject **dest, _Py_Identifier *name)
2401{
2402 if (Element_CheckExact(element)) {
2403 Py_DECREF(JOIN_OBJ(*dest));
2404 *dest = JOIN_SET(data, PyList_CheckExact(data));
2405 return 0;
2406 }
2407 else {
2408 PyObject *joined = list_join(data);
2409 int r;
2410 if (joined == NULL)
2411 return -1;
2412 r = _PyObject_SetAttrId(element, name, joined);
2413 Py_DECREF(joined);
2414 return r;
2415 }
2416}
2417
2418/* These two functions steal a reference to data */
2419static int
2420treebuilder_set_element_text(PyObject *element, PyObject *data)
2421{
2422 _Py_IDENTIFIER(text);
2423 return treebuilder_set_element_text_or_tail(
2424 element, data, &((ElementObject *) element)->text, &PyId_text);
2425}
2426
2427static int
2428treebuilder_set_element_tail(PyObject *element, PyObject *data)
2429{
2430 _Py_IDENTIFIER(tail);
2431 return treebuilder_set_element_text_or_tail(
2432 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2433}
2434
2435static int
2436treebuilder_add_subelement(PyObject *element, PyObject *child)
2437{
2438 _Py_IDENTIFIER(append);
2439 if (Element_CheckExact(element)) {
2440 ElementObject *elem = (ElementObject *) element;
2441 return element_add_subelement(elem, child);
2442 }
2443 else {
2444 PyObject *res;
2445 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2446 if (res == NULL)
2447 return -1;
2448 Py_DECREF(res);
2449 return 0;
2450 }
2451}
2452
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002453LOCAL(int)
2454treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2455 PyObject *node)
2456{
2457 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002458 PyObject *res;
2459 PyObject *event = PyTuple_Pack(2, action, node);
2460 if (event == NULL)
2461 return -1;
2462 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2463 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002464 if (res == NULL)
2465 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002466 Py_DECREF(res);
2467 }
2468 return 0;
2469}
2470
Antoine Pitrouee329312012-10-04 19:53:29 +02002471/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472/* handlers */
2473
2474LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2476 PyObject* attrib)
2477{
2478 PyObject* node;
2479 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002480 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481
2482 if (self->data) {
2483 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002484 if (treebuilder_set_element_text(self->last, self->data))
2485 return NULL;
2486 }
2487 else {
2488 if (treebuilder_set_element_tail(self->last, self->data))
2489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 }
2491 self->data = NULL;
2492 }
2493
Eli Bendersky08231a92013-05-18 15:47:16 -07002494 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002495 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2496 } else {
2497 node = create_new_element(tag, attrib);
2498 }
2499 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002500 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002501 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502
Antoine Pitrouee329312012-10-04 19:53:29 +02002503 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504
2505 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002506 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002507 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508 } else {
2509 if (self->root) {
2510 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002511 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 "multiple elements on top level"
2513 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002514 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515 }
2516 Py_INCREF(node);
2517 self->root = node;
2518 }
2519
2520 if (self->index < PyList_GET_SIZE(self->stack)) {
2521 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002522 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 Py_INCREF(this);
2524 } else {
2525 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002526 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527 }
2528 self->index++;
2529
2530 Py_DECREF(this);
2531 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002532 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002533
2534 Py_DECREF(self->last);
2535 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002536 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002538 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2539 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
2541 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002542
2543 error:
2544 Py_DECREF(node);
2545 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546}
2547
2548LOCAL(PyObject*)
2549treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2550{
2551 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002552 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002553 /* ignore calls to data before the first call to start */
2554 Py_RETURN_NONE;
2555 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002556 /* store the first item as is */
2557 Py_INCREF(data); self->data = data;
2558 } else {
2559 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002560 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2561 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002562 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 /* expat often generates single character data sections; handle
2564 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002565 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2566 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002568 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002569 } else if (PyList_CheckExact(self->data)) {
2570 if (PyList_Append(self->data, data) < 0)
2571 return NULL;
2572 } else {
2573 PyObject* list = PyList_New(2);
2574 if (!list)
2575 return NULL;
2576 PyList_SET_ITEM(list, 0, self->data);
2577 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2578 self->data = list;
2579 }
2580 }
2581
2582 Py_RETURN_NONE;
2583}
2584
2585LOCAL(PyObject*)
2586treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2587{
2588 PyObject* item;
2589
2590 if (self->data) {
2591 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002592 if (treebuilder_set_element_text(self->last, self->data))
2593 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002595 if (treebuilder_set_element_tail(self->last, self->data))
2596 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002597 }
2598 self->data = NULL;
2599 }
2600
2601 if (self->index == 0) {
2602 PyErr_SetString(
2603 PyExc_IndexError,
2604 "pop from empty stack"
2605 );
2606 return NULL;
2607 }
2608
2609 self->index--;
2610
2611 item = PyList_GET_ITEM(self->stack, self->index);
2612 Py_INCREF(item);
2613
2614 Py_DECREF(self->last);
2615
Antoine Pitrouee329312012-10-04 19:53:29 +02002616 self->last = self->this;
2617 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002619 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2620 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621
2622 Py_INCREF(self->last);
2623 return (PyObject*) self->last;
2624}
2625
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002626/* -------------------------------------------------------------------- */
2627/* methods (in alphabetical order) */
2628
Serhiy Storchakacb985562015-05-04 15:32:48 +03002629/*[clinic input]
2630_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002631
Serhiy Storchakacb985562015-05-04 15:32:48 +03002632 data: object
2633 /
2634
2635[clinic start generated code]*/
2636
2637static PyObject *
2638_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2639/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2640{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002641 return treebuilder_handle_data(self, data);
2642}
2643
Serhiy Storchakacb985562015-05-04 15:32:48 +03002644/*[clinic input]
2645_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002646
Serhiy Storchakacb985562015-05-04 15:32:48 +03002647 tag: object
2648 /
2649
2650[clinic start generated code]*/
2651
2652static PyObject *
2653_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2654/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2655{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656 return treebuilder_handle_end(self, tag);
2657}
2658
2659LOCAL(PyObject*)
2660treebuilder_done(TreeBuilderObject* self)
2661{
2662 PyObject* res;
2663
2664 /* FIXME: check stack size? */
2665
2666 if (self->root)
2667 res = self->root;
2668 else
2669 res = Py_None;
2670
2671 Py_INCREF(res);
2672 return res;
2673}
2674
Serhiy Storchakacb985562015-05-04 15:32:48 +03002675/*[clinic input]
2676_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002677
Serhiy Storchakacb985562015-05-04 15:32:48 +03002678[clinic start generated code]*/
2679
2680static PyObject *
2681_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2682/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2683{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684 return treebuilder_done(self);
2685}
2686
Serhiy Storchakacb985562015-05-04 15:32:48 +03002687/*[clinic input]
2688_elementtree.TreeBuilder.start
2689
2690 tag: object
2691 attrs: object = None
2692 /
2693
2694[clinic start generated code]*/
2695
2696static PyObject *
2697_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2698 PyObject *attrs)
2699/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002701 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702}
2703
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704/* ==================================================================== */
2705/* the expat interface */
2706
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002709
2710/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2711 * cached globally without being in per-module state.
2712 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002713static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715
Eli Bendersky52467b12012-06-01 07:13:08 +03002716static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2717 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2718
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719typedef struct {
2720 PyObject_HEAD
2721
2722 XML_Parser parser;
2723
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002724 PyObject *target;
2725 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002729 PyObject *handle_start;
2730 PyObject *handle_data;
2731 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002733 PyObject *handle_comment;
2734 PyObject *handle_pi;
2735 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002737 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739} XMLParserObject;
2740
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002741static PyObject*
2742_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2743static PyObject *
2744_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2745 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002746
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747/* helpers */
2748
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749LOCAL(PyObject*)
2750makeuniversal(XMLParserObject* self, const char* string)
2751{
2752 /* convert a UTF-8 tag/attribute name from the expat parser
2753 to a universal name string */
2754
Antoine Pitrouc1948842012-10-01 23:40:37 +02002755 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756 PyObject* key;
2757 PyObject* value;
2758
2759 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002760 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002761 if (!key)
2762 return NULL;
2763
2764 value = PyDict_GetItem(self->names, key);
2765
2766 if (value) {
2767 Py_INCREF(value);
2768 } else {
2769 /* new name. convert to universal name, and decode as
2770 necessary */
2771
2772 PyObject* tag;
2773 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002774 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002775
2776 /* look for namespace separator */
2777 for (i = 0; i < size; i++)
2778 if (string[i] == '}')
2779 break;
2780 if (i != size) {
2781 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002782 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002783 if (tag == NULL) {
2784 Py_DECREF(key);
2785 return NULL;
2786 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002787 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 p[0] = '{';
2789 memcpy(p+1, string, size);
2790 size++;
2791 } else {
2792 /* plain name; use key as tag */
2793 Py_INCREF(key);
2794 tag = key;
2795 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002796
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002797 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002798 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002799 value = PyUnicode_DecodeUTF8(p, size, "strict");
2800 Py_DECREF(tag);
2801 if (!value) {
2802 Py_DECREF(key);
2803 return NULL;
2804 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805
2806 /* add to names dictionary */
2807 if (PyDict_SetItem(self->names, key, value) < 0) {
2808 Py_DECREF(key);
2809 Py_DECREF(value);
2810 return NULL;
2811 }
2812 }
2813
2814 Py_DECREF(key);
2815 return value;
2816}
2817
Eli Bendersky5b77d812012-03-16 08:20:05 +02002818/* Set the ParseError exception with the given parameters.
2819 * If message is not NULL, it's used as the error string. Otherwise, the
2820 * message string is the default for the given error_code.
2821*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002822static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002823expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2824 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002825{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002826 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002827 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002828
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002829 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002830 message ? message : EXPAT(ErrorString)(error_code),
2831 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002832 if (errmsg == NULL)
2833 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002834
Eli Bendersky532d03e2013-08-10 08:00:39 -07002835 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002836 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002837 if (!error)
2838 return;
2839
Eli Bendersky5b77d812012-03-16 08:20:05 +02002840 /* Add code and position attributes */
2841 code = PyLong_FromLong((long)error_code);
2842 if (!code) {
2843 Py_DECREF(error);
2844 return;
2845 }
2846 if (PyObject_SetAttrString(error, "code", code) == -1) {
2847 Py_DECREF(error);
2848 Py_DECREF(code);
2849 return;
2850 }
2851 Py_DECREF(code);
2852
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002853 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854 if (!position) {
2855 Py_DECREF(error);
2856 return;
2857 }
2858 if (PyObject_SetAttrString(error, "position", position) == -1) {
2859 Py_DECREF(error);
2860 Py_DECREF(position);
2861 return;
2862 }
2863 Py_DECREF(position);
2864
Eli Bendersky532d03e2013-08-10 08:00:39 -07002865 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866 Py_DECREF(error);
2867}
2868
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002869/* -------------------------------------------------------------------- */
2870/* handlers */
2871
2872static void
2873expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2874 int data_len)
2875{
2876 PyObject* key;
2877 PyObject* value;
2878 PyObject* res;
2879
2880 if (data_len < 2 || data_in[0] != '&')
2881 return;
2882
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002883 if (PyErr_Occurred())
2884 return;
2885
Neal Norwitz0269b912007-08-08 06:56:02 +00002886 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002887 if (!key)
2888 return;
2889
2890 value = PyDict_GetItem(self->entity, key);
2891
2892 if (value) {
2893 if (TreeBuilder_CheckExact(self->target))
2894 res = treebuilder_handle_data(
2895 (TreeBuilderObject*) self->target, value
2896 );
2897 else if (self->handle_data)
2898 res = PyObject_CallFunction(self->handle_data, "O", value);
2899 else
2900 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002902 } else if (!PyErr_Occurred()) {
2903 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002904 char message[128] = "undefined entity ";
2905 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002906 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002907 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002909 EXPAT(GetErrorColumnNumber)(self->parser),
2910 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002911 );
2912 }
2913
2914 Py_DECREF(key);
2915}
2916
2917static void
2918expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2919 const XML_Char **attrib_in)
2920{
2921 PyObject* res;
2922 PyObject* tag;
2923 PyObject* attrib;
2924 int ok;
2925
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002926 if (PyErr_Occurred())
2927 return;
2928
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002929 /* tag name */
2930 tag = makeuniversal(self, tag_in);
2931 if (!tag)
2932 return; /* parser will look for errors */
2933
2934 /* attributes */
2935 if (attrib_in[0]) {
2936 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002937 if (!attrib) {
2938 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002939 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002940 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 while (attrib_in[0] && attrib_in[1]) {
2942 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002943 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002944 if (!key || !value) {
2945 Py_XDECREF(value);
2946 Py_XDECREF(key);
2947 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002948 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002949 return;
2950 }
2951 ok = PyDict_SetItem(attrib, key, value);
2952 Py_DECREF(value);
2953 Py_DECREF(key);
2954 if (ok < 0) {
2955 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002956 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002957 return;
2958 }
2959 attrib_in += 2;
2960 }
2961 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002962 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002963 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002964 if (!attrib) {
2965 Py_DECREF(tag);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002966 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002967 }
Eli Bendersky48d358b2012-05-30 17:57:50 +03002968 }
2969
2970 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 /* shortcut */
2972 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2973 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002974 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002975 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002977 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002978 res = NULL;
2979
2980 Py_DECREF(tag);
2981 Py_DECREF(attrib);
2982
2983 Py_XDECREF(res);
2984}
2985
2986static void
2987expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2988 int data_len)
2989{
2990 PyObject* data;
2991 PyObject* res;
2992
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002993 if (PyErr_Occurred())
2994 return;
2995
Neal Norwitz0269b912007-08-08 06:56:02 +00002996 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002997 if (!data)
2998 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002999
3000 if (TreeBuilder_CheckExact(self->target))
3001 /* shortcut */
3002 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3003 else if (self->handle_data)
3004 res = PyObject_CallFunction(self->handle_data, "O", data);
3005 else
3006 res = NULL;
3007
3008 Py_DECREF(data);
3009
3010 Py_XDECREF(res);
3011}
3012
3013static void
3014expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3015{
3016 PyObject* tag;
3017 PyObject* res = NULL;
3018
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003019 if (PyErr_Occurred())
3020 return;
3021
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003022 if (TreeBuilder_CheckExact(self->target))
3023 /* shortcut */
3024 /* the standard tree builder doesn't look at the end tag */
3025 res = treebuilder_handle_end(
3026 (TreeBuilderObject*) self->target, Py_None
3027 );
3028 else if (self->handle_end) {
3029 tag = makeuniversal(self, tag_in);
3030 if (tag) {
3031 res = PyObject_CallFunction(self->handle_end, "O", tag);
3032 Py_DECREF(tag);
3033 }
3034 }
3035
3036 Py_XDECREF(res);
3037}
3038
3039static void
3040expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3041 const XML_Char *uri)
3042{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003043 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3044 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003045
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003046 if (PyErr_Occurred())
3047 return;
3048
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003049 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003050 return;
3051
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003052 if (!uri)
3053 uri = "";
3054 if (!prefix)
3055 prefix = "";
3056
3057 parcel = Py_BuildValue("ss", prefix, uri);
3058 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003059 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003060 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3061 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003062}
3063
3064static void
3065expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3066{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003067 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3068
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003069 if (PyErr_Occurred())
3070 return;
3071
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003072 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003073 return;
3074
3075 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003076}
3077
3078static void
3079expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3080{
3081 PyObject* comment;
3082 PyObject* res;
3083
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003084 if (PyErr_Occurred())
3085 return;
3086
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003088 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089 if (comment) {
3090 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3091 Py_XDECREF(res);
3092 Py_DECREF(comment);
3093 }
3094 }
3095}
3096
Eli Bendersky45839902013-01-13 05:14:47 -08003097static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003098expat_start_doctype_handler(XMLParserObject *self,
3099 const XML_Char *doctype_name,
3100 const XML_Char *sysid,
3101 const XML_Char *pubid,
3102 int has_internal_subset)
3103{
3104 PyObject *self_pyobj = (PyObject *)self;
3105 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3106 PyObject *parser_doctype = NULL;
3107 PyObject *res = NULL;
3108
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003109 if (PyErr_Occurred())
3110 return;
3111
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003112 doctype_name_obj = makeuniversal(self, doctype_name);
3113 if (!doctype_name_obj)
3114 return;
3115
3116 if (sysid) {
3117 sysid_obj = makeuniversal(self, sysid);
3118 if (!sysid_obj) {
3119 Py_DECREF(doctype_name_obj);
3120 return;
3121 }
3122 } else {
3123 Py_INCREF(Py_None);
3124 sysid_obj = Py_None;
3125 }
3126
3127 if (pubid) {
3128 pubid_obj = makeuniversal(self, pubid);
3129 if (!pubid_obj) {
3130 Py_DECREF(doctype_name_obj);
3131 Py_DECREF(sysid_obj);
3132 return;
3133 }
3134 } else {
3135 Py_INCREF(Py_None);
3136 pubid_obj = Py_None;
3137 }
3138
3139 /* If the target has a handler for doctype, call it. */
3140 if (self->handle_doctype) {
3141 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3142 doctype_name_obj, pubid_obj, sysid_obj);
3143 Py_CLEAR(res);
3144 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003145 else {
3146 /* Now see if the parser itself has a doctype method. If yes and it's
3147 * a custom method, call it but warn about deprecation. If it's only
3148 * the vanilla XMLParser method, do nothing.
3149 */
3150 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3151 if (parser_doctype &&
3152 !(PyCFunction_Check(parser_doctype) &&
3153 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3154 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003155 (PyCFunction) _elementtree_XMLParser_doctype)) {
3156 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3157 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003158 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003159 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003160 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003161 res = PyObject_CallFunction(parser_doctype, "OOO",
3162 doctype_name_obj, pubid_obj, sysid_obj);
3163 Py_CLEAR(res);
3164 }
3165 }
3166
3167clear:
3168 Py_XDECREF(parser_doctype);
3169 Py_DECREF(doctype_name_obj);
3170 Py_DECREF(pubid_obj);
3171 Py_DECREF(sysid_obj);
3172}
3173
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003174static void
3175expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3176 const XML_Char* data_in)
3177{
3178 PyObject* target;
3179 PyObject* data;
3180 PyObject* res;
3181
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003182 if (PyErr_Occurred())
3183 return;
3184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003186 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3187 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188 if (target && data) {
3189 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3190 Py_XDECREF(res);
3191 Py_DECREF(data);
3192 Py_DECREF(target);
3193 } else {
3194 Py_XDECREF(data);
3195 Py_XDECREF(target);
3196 }
3197 }
3198}
3199
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201
Eli Bendersky52467b12012-06-01 07:13:08 +03003202static PyObject *
3203xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204{
Eli Bendersky52467b12012-06-01 07:13:08 +03003205 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3206 if (self) {
3207 self->parser = NULL;
3208 self->target = self->entity = self->names = NULL;
3209 self->handle_start = self->handle_data = self->handle_end = NULL;
3210 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003211 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003213 return (PyObject *)self;
3214}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215
Serhiy Storchakacb985562015-05-04 15:32:48 +03003216/*[clinic input]
3217_elementtree.XMLParser.__init__
3218
3219 html: object = NULL
3220 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003221 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003222
3223[clinic start generated code]*/
3224
Eli Bendersky52467b12012-06-01 07:13:08 +03003225static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003226_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3227 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003228/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003229{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003230 self->entity = PyDict_New();
3231 if (!self->entity)
3232 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003233
Serhiy Storchakacb985562015-05-04 15:32:48 +03003234 self->names = PyDict_New();
3235 if (!self->names) {
3236 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003237 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003238 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003239
Serhiy Storchakacb985562015-05-04 15:32:48 +03003240 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3241 if (!self->parser) {
3242 Py_CLEAR(self->entity);
3243 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003245 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003246 }
3247
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 if (target) {
3249 Py_INCREF(target);
3250 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003251 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003252 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003253 Py_CLEAR(self->entity);
3254 Py_CLEAR(self->names);
3255 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003258 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003259 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260
Serhiy Storchakacb985562015-05-04 15:32:48 +03003261 self->handle_start = PyObject_GetAttrString(target, "start");
3262 self->handle_data = PyObject_GetAttrString(target, "data");
3263 self->handle_end = PyObject_GetAttrString(target, "end");
3264 self->handle_comment = PyObject_GetAttrString(target, "comment");
3265 self->handle_pi = PyObject_GetAttrString(target, "pi");
3266 self->handle_close = PyObject_GetAttrString(target, "close");
3267 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268
3269 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003270
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003274 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275 (XML_StartElementHandler) expat_start_handler,
3276 (XML_EndElementHandler) expat_end_handler
3277 );
3278 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 (XML_DefaultHandler) expat_default_handler
3281 );
3282 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003283 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 (XML_CharacterDataHandler) expat_data_handler
3285 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003289 (XML_CommentHandler) expat_comment_handler
3290 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003293 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294 (XML_ProcessingInstructionHandler) expat_pi_handler
3295 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003296 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003298 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3299 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003302 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304
Eli Bendersky52467b12012-06-01 07:13:08 +03003305 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306}
3307
Eli Bendersky52467b12012-06-01 07:13:08 +03003308static int
3309xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3310{
3311 Py_VISIT(self->handle_close);
3312 Py_VISIT(self->handle_pi);
3313 Py_VISIT(self->handle_comment);
3314 Py_VISIT(self->handle_end);
3315 Py_VISIT(self->handle_data);
3316 Py_VISIT(self->handle_start);
3317
3318 Py_VISIT(self->target);
3319 Py_VISIT(self->entity);
3320 Py_VISIT(self->names);
3321
3322 return 0;
3323}
3324
3325static int
3326xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327{
3328 EXPAT(ParserFree)(self->parser);
3329
Antoine Pitrouc1948842012-10-01 23:40:37 +02003330 Py_CLEAR(self->handle_close);
3331 Py_CLEAR(self->handle_pi);
3332 Py_CLEAR(self->handle_comment);
3333 Py_CLEAR(self->handle_end);
3334 Py_CLEAR(self->handle_data);
3335 Py_CLEAR(self->handle_start);
3336 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003337
Antoine Pitrouc1948842012-10-01 23:40:37 +02003338 Py_CLEAR(self->target);
3339 Py_CLEAR(self->entity);
3340 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003341
Eli Bendersky52467b12012-06-01 07:13:08 +03003342 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343}
3344
Eli Bendersky52467b12012-06-01 07:13:08 +03003345static void
3346xmlparser_dealloc(XMLParserObject* self)
3347{
3348 PyObject_GC_UnTrack(self);
3349 xmlparser_gc_clear(self);
3350 Py_TYPE(self)->tp_free((PyObject *)self);
3351}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352
3353LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003354expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355{
3356 int ok;
3357
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003358 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003359 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3360
3361 if (PyErr_Occurred())
3362 return NULL;
3363
3364 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003365 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003366 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003368 EXPAT(GetErrorColumnNumber)(self->parser),
3369 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 );
3371 return NULL;
3372 }
3373
3374 Py_RETURN_NONE;
3375}
3376
Serhiy Storchakacb985562015-05-04 15:32:48 +03003377/*[clinic input]
3378_elementtree.XMLParser.close
3379
3380[clinic start generated code]*/
3381
3382static PyObject *
3383_elementtree_XMLParser_close_impl(XMLParserObject *self)
3384/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385{
3386 /* end feeding data to parser */
3387
3388 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003390 if (!res)
3391 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003393 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 Py_DECREF(res);
3395 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003396 }
3397 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003398 Py_DECREF(res);
3399 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003400 }
3401 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003402 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003403 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404}
3405
Serhiy Storchakacb985562015-05-04 15:32:48 +03003406/*[clinic input]
3407_elementtree.XMLParser.feed
3408
3409 data: object
3410 /
3411
3412[clinic start generated code]*/
3413
3414static PyObject *
3415_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3416/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003417{
3418 /* feed data to parser */
3419
Serhiy Storchakacb985562015-05-04 15:32:48 +03003420 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003421 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003422 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3423 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003424 return NULL;
3425 if (data_len > INT_MAX) {
3426 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3427 return NULL;
3428 }
3429 /* Explicitly set UTF-8 encoding. Return code ignored. */
3430 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003431 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003432 }
3433 else {
3434 Py_buffer view;
3435 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003436 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003437 return NULL;
3438 if (view.len > INT_MAX) {
3439 PyBuffer_Release(&view);
3440 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3441 return NULL;
3442 }
3443 res = expat_parse(self, view.buf, (int)view.len, 0);
3444 PyBuffer_Release(&view);
3445 return res;
3446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447}
3448
Serhiy Storchakacb985562015-05-04 15:32:48 +03003449/*[clinic input]
3450_elementtree.XMLParser._parse_whole
3451
3452 file: object
3453 /
3454
3455[clinic start generated code]*/
3456
3457static PyObject *
3458_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3459/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003460{
Eli Benderskya3699232013-05-19 18:47:23 -07003461 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003462 PyObject* reader;
3463 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003464 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003465 PyObject* res;
3466
Serhiy Storchakacb985562015-05-04 15:32:48 +03003467 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003468 if (!reader)
3469 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003470
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471 /* read from open file object */
3472 for (;;) {
3473
3474 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3475
3476 if (!buffer) {
3477 /* read failed (e.g. due to KeyboardInterrupt) */
3478 Py_DECREF(reader);
3479 return NULL;
3480 }
3481
Eli Benderskyf996e772012-03-16 05:53:30 +02003482 if (PyUnicode_CheckExact(buffer)) {
3483 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003484 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003485 Py_DECREF(buffer);
3486 break;
3487 }
3488 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003489 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003490 if (!temp) {
3491 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003492 Py_DECREF(reader);
3493 return NULL;
3494 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003495 buffer = temp;
3496 }
3497 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 Py_DECREF(buffer);
3499 break;
3500 }
3501
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003502 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3503 Py_DECREF(buffer);
3504 Py_DECREF(reader);
3505 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3506 return NULL;
3507 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003509 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003510 );
3511
3512 Py_DECREF(buffer);
3513
3514 if (!res) {
3515 Py_DECREF(reader);
3516 return NULL;
3517 }
3518 Py_DECREF(res);
3519
3520 }
3521
3522 Py_DECREF(reader);
3523
3524 res = expat_parse(self, "", 0, 1);
3525
3526 if (res && TreeBuilder_CheckExact(self->target)) {
3527 Py_DECREF(res);
3528 return treebuilder_done((TreeBuilderObject*) self->target);
3529 }
3530
3531 return res;
3532}
3533
Serhiy Storchakacb985562015-05-04 15:32:48 +03003534/*[clinic input]
3535_elementtree.XMLParser.doctype
3536
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003537 name: object
3538 pubid: object
3539 system: object
3540 /
3541
Serhiy Storchakacb985562015-05-04 15:32:48 +03003542[clinic start generated code]*/
3543
3544static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003545_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3546 PyObject *pubid, PyObject *system)
3547/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003548{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003549 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3550 "This method of XMLParser is deprecated. Define"
3551 " doctype() method on the TreeBuilder target.",
3552 1) < 0) {
3553 return NULL;
3554 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003555 Py_RETURN_NONE;
3556}
3557
Serhiy Storchakacb985562015-05-04 15:32:48 +03003558/*[clinic input]
3559_elementtree.XMLParser._setevents
3560
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003561 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003562 events_to_report: object = None
3563 /
3564
3565[clinic start generated code]*/
3566
3567static PyObject *
3568_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3569 PyObject *events_queue,
3570 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003571/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003572{
3573 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003574 Py_ssize_t i, seqlen;
3575 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003576 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003577
3578 if (!TreeBuilder_CheckExact(self->target)) {
3579 PyErr_SetString(
3580 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003581 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 "targets"
3583 );
3584 return NULL;
3585 }
3586
3587 target = (TreeBuilderObject*) self->target;
3588
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003589 events_append = PyObject_GetAttrString(events_queue, "append");
3590 if (events_append == NULL)
3591 return NULL;
3592 Py_XDECREF(target->events_append);
3593 target->events_append = events_append;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594
3595 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003596 Py_CLEAR(target->start_event_obj);
3597 Py_CLEAR(target->end_event_obj);
3598 Py_CLEAR(target->start_ns_event_obj);
3599 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003600
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003601 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003603 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604 Py_RETURN_NONE;
3605 }
3606
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003607 if (!(events_seq = PySequence_Fast(events_to_report,
3608 "events must be a sequence"))) {
3609 return NULL;
3610 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003612 seqlen = PySequence_Size(events_seq);
3613 for (i = 0; i < seqlen; ++i) {
3614 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3615 char *event_name = NULL;
3616 if (PyUnicode_Check(event_name_obj)) {
3617 event_name = _PyUnicode_AsString(event_name_obj);
3618 } else if (PyBytes_Check(event_name_obj)) {
3619 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003620 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003621
3622 if (event_name == NULL) {
3623 Py_DECREF(events_seq);
3624 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3625 return NULL;
3626 } else if (strcmp(event_name, "start") == 0) {
3627 Py_INCREF(event_name_obj);
3628 target->start_event_obj = event_name_obj;
3629 } else if (strcmp(event_name, "end") == 0) {
3630 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003632 target->end_event_obj = event_name_obj;
3633 } else if (strcmp(event_name, "start-ns") == 0) {
3634 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003635 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003636 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637 EXPAT(SetNamespaceDeclHandler)(
3638 self->parser,
3639 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3640 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3641 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003642 } else if (strcmp(event_name, "end-ns") == 0) {
3643 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003645 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 EXPAT(SetNamespaceDeclHandler)(
3647 self->parser,
3648 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3649 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3650 );
3651 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003652 Py_DECREF(events_seq);
3653 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003654 return NULL;
3655 }
3656 }
3657
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003658 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003659 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660}
3661
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003662static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003663xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003664{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003665 if (PyUnicode_Check(nameobj)) {
3666 PyObject* res;
3667 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3668 res = self->entity;
3669 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3670 res = self->target;
3671 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3672 return PyUnicode_FromFormat(
3673 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003674 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003675 }
3676 else
3677 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003678
Alexander Belopolskye239d232010-12-08 23:31:48 +00003679 Py_INCREF(res);
3680 return res;
3681 }
3682 generic:
3683 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684}
3685
Serhiy Storchakacb985562015-05-04 15:32:48 +03003686#include "clinic/_elementtree.c.h"
3687
3688static PyMethodDef element_methods[] = {
3689
3690 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3691
3692 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3693 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3694
3695 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3696 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3697 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3698
3699 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3700 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3701 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3702 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3703
3704 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3705 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3706 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3707
3708 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3709 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3710
3711 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3712 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3713
3714 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3715
3716 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3717 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3718 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3719 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3720 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3721
3722 {NULL, NULL}
3723};
3724
3725static PyMappingMethods element_as_mapping = {
3726 (lenfunc) element_length,
3727 (binaryfunc) element_subscr,
3728 (objobjargproc) element_ass_subscr,
3729};
3730
Serhiy Storchakadde08152015-11-25 15:28:13 +02003731static PyGetSetDef element_getsetlist[] = {
3732 {"tag",
3733 (getter)element_tag_getter,
3734 (setter)element_tag_setter,
3735 "A string identifying what kind of data this element represents"},
3736 {"text",
3737 (getter)element_text_getter,
3738 (setter)element_text_setter,
3739 "A string of text directly after the start tag, or None"},
3740 {"tail",
3741 (getter)element_tail_getter,
3742 (setter)element_tail_setter,
3743 "A string of text directly after the end tag, or None"},
3744 {"attrib",
3745 (getter)element_attrib_getter,
3746 (setter)element_attrib_setter,
3747 "A dictionary containing the element's attributes"},
3748 {NULL},
3749};
3750
Serhiy Storchakacb985562015-05-04 15:32:48 +03003751static PyTypeObject Element_Type = {
3752 PyVarObject_HEAD_INIT(NULL, 0)
3753 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3754 /* methods */
3755 (destructor)element_dealloc, /* tp_dealloc */
3756 0, /* tp_print */
3757 0, /* tp_getattr */
3758 0, /* tp_setattr */
3759 0, /* tp_reserved */
3760 (reprfunc)element_repr, /* tp_repr */
3761 0, /* tp_as_number */
3762 &element_as_sequence, /* tp_as_sequence */
3763 &element_as_mapping, /* tp_as_mapping */
3764 0, /* tp_hash */
3765 0, /* tp_call */
3766 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003767 PyObject_GenericGetAttr, /* tp_getattro */
3768 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003769 0, /* tp_as_buffer */
3770 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3771 /* tp_flags */
3772 0, /* tp_doc */
3773 (traverseproc)element_gc_traverse, /* tp_traverse */
3774 (inquiry)element_gc_clear, /* tp_clear */
3775 0, /* tp_richcompare */
3776 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3777 0, /* tp_iter */
3778 0, /* tp_iternext */
3779 element_methods, /* tp_methods */
3780 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003781 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003782 0, /* tp_base */
3783 0, /* tp_dict */
3784 0, /* tp_descr_get */
3785 0, /* tp_descr_set */
3786 0, /* tp_dictoffset */
3787 (initproc)element_init, /* tp_init */
3788 PyType_GenericAlloc, /* tp_alloc */
3789 element_new, /* tp_new */
3790 0, /* tp_free */
3791};
3792
3793static PyMethodDef treebuilder_methods[] = {
3794 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3795 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3796 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3797 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3798 {NULL, NULL}
3799};
3800
3801static PyTypeObject TreeBuilder_Type = {
3802 PyVarObject_HEAD_INIT(NULL, 0)
3803 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3804 /* methods */
3805 (destructor)treebuilder_dealloc, /* tp_dealloc */
3806 0, /* tp_print */
3807 0, /* tp_getattr */
3808 0, /* tp_setattr */
3809 0, /* tp_reserved */
3810 0, /* tp_repr */
3811 0, /* tp_as_number */
3812 0, /* tp_as_sequence */
3813 0, /* tp_as_mapping */
3814 0, /* tp_hash */
3815 0, /* tp_call */
3816 0, /* tp_str */
3817 0, /* tp_getattro */
3818 0, /* tp_setattro */
3819 0, /* tp_as_buffer */
3820 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3821 /* tp_flags */
3822 0, /* tp_doc */
3823 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3824 (inquiry)treebuilder_gc_clear, /* tp_clear */
3825 0, /* tp_richcompare */
3826 0, /* tp_weaklistoffset */
3827 0, /* tp_iter */
3828 0, /* tp_iternext */
3829 treebuilder_methods, /* tp_methods */
3830 0, /* tp_members */
3831 0, /* tp_getset */
3832 0, /* tp_base */
3833 0, /* tp_dict */
3834 0, /* tp_descr_get */
3835 0, /* tp_descr_set */
3836 0, /* tp_dictoffset */
3837 _elementtree_TreeBuilder___init__, /* tp_init */
3838 PyType_GenericAlloc, /* tp_alloc */
3839 treebuilder_new, /* tp_new */
3840 0, /* tp_free */
3841};
3842
3843static PyMethodDef xmlparser_methods[] = {
3844 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3845 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3846 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3847 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3848 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3849 {NULL, NULL}
3850};
3851
Neal Norwitz227b5332006-03-22 09:28:35 +00003852static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003853 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003854 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003855 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003856 (destructor)xmlparser_dealloc, /* tp_dealloc */
3857 0, /* tp_print */
3858 0, /* tp_getattr */
3859 0, /* tp_setattr */
3860 0, /* tp_reserved */
3861 0, /* tp_repr */
3862 0, /* tp_as_number */
3863 0, /* tp_as_sequence */
3864 0, /* tp_as_mapping */
3865 0, /* tp_hash */
3866 0, /* tp_call */
3867 0, /* tp_str */
3868 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3869 0, /* tp_setattro */
3870 0, /* tp_as_buffer */
3871 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3872 /* tp_flags */
3873 0, /* tp_doc */
3874 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3875 (inquiry)xmlparser_gc_clear, /* tp_clear */
3876 0, /* tp_richcompare */
3877 0, /* tp_weaklistoffset */
3878 0, /* tp_iter */
3879 0, /* tp_iternext */
3880 xmlparser_methods, /* tp_methods */
3881 0, /* tp_members */
3882 0, /* tp_getset */
3883 0, /* tp_base */
3884 0, /* tp_dict */
3885 0, /* tp_descr_get */
3886 0, /* tp_descr_set */
3887 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003888 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003889 PyType_GenericAlloc, /* tp_alloc */
3890 xmlparser_new, /* tp_new */
3891 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003892};
3893
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003894/* ==================================================================== */
3895/* python module interface */
3896
3897static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003898 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003899 {NULL, NULL}
3900};
3901
Martin v. Löwis1a214512008-06-11 05:26:20 +00003902
Eli Bendersky532d03e2013-08-10 08:00:39 -07003903static struct PyModuleDef elementtreemodule = {
3904 PyModuleDef_HEAD_INIT,
3905 "_elementtree",
3906 NULL,
3907 sizeof(elementtreestate),
3908 _functions,
3909 NULL,
3910 elementtree_traverse,
3911 elementtree_clear,
3912 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003913};
3914
Neal Norwitzf6657e62006-12-28 04:47:50 +00003915PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003916PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003917{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003918 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003919 elementtreestate *st;
3920
3921 m = PyState_FindModule(&elementtreemodule);
3922 if (m) {
3923 Py_INCREF(m);
3924 return m;
3925 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003926
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003927 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003928 if (PyType_Ready(&ElementIter_Type) < 0)
3929 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003930 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003931 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003932 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003933 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003934 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003935 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003936
Eli Bendersky532d03e2013-08-10 08:00:39 -07003937 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003938 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003939 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003940 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003941
Eli Bendersky828efde2012-04-05 05:40:58 +03003942 if (!(temp = PyImport_ImportModule("copy")))
3943 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003944 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003945 Py_XDECREF(temp);
3946
Eli Bendersky532d03e2013-08-10 08:00:39 -07003947 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003948 return NULL;
3949
Eli Bendersky20d41742012-06-01 09:48:37 +03003950 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003951 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3952 if (expat_capi) {
3953 /* check that it's usable */
3954 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003955 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003956 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3957 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003958 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003959 PyErr_SetString(PyExc_ImportError,
3960 "pyexpat version is incompatible");
3961 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003962 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003963 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003964 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003965 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003966
Eli Bendersky532d03e2013-08-10 08:00:39 -07003967 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003968 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003969 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003970 Py_INCREF(st->parseerror_obj);
3971 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003972
Eli Bendersky092af1f2012-03-04 07:14:03 +02003973 Py_INCREF((PyObject *)&Element_Type);
3974 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3975
Eli Bendersky58d548d2012-05-29 15:45:16 +03003976 Py_INCREF((PyObject *)&TreeBuilder_Type);
3977 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3978
Eli Bendersky52467b12012-06-01 07:13:08 +03003979 Py_INCREF((PyObject *)&XMLParser_Type);
3980 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003981
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003982 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003983}