blob: 85ffca206bab2d60a6ec80727fd73dd12f8fac4a [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200850 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200938 Py_INCREF(tag);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300939 Py_XSETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940
Eli Benderskydd3661e2013-09-13 06:24:25 -0700941 _clear_joined_ptr(&self->text);
942 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
943 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944
Eli Benderskydd3661e2013-09-13 06:24:25 -0700945 _clear_joined_ptr(&self->tail);
946 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
947 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800948
949 /* Handle ATTRIB and CHILDREN. */
950 if (!children && !attrib)
951 Py_RETURN_NONE;
952
953 /* Compute 'nchildren'. */
954 if (children) {
955 if (!PyList_Check(children)) {
956 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
957 return NULL;
958 }
959 nchildren = PyList_Size(children);
960 }
961 else {
962 nchildren = 0;
963 }
964
965 /* Allocate 'extra'. */
966 if (element_resize(self, nchildren)) {
967 return NULL;
968 }
969 assert(self->extra && self->extra->allocated >= nchildren);
970
971 /* Copy children */
972 for (i = 0; i < nchildren; i++) {
973 self->extra->children[i] = PyList_GET_ITEM(children, i);
974 Py_INCREF(self->extra->children[i]);
975 }
976
977 self->extra->length = nchildren;
978 self->extra->allocated = nchildren;
979
980 /* Stash attrib. */
981 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -0800982 Py_INCREF(attrib);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300983 Py_XSETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 }
985
986 Py_RETURN_NONE;
987}
988
989/* __setstate__ for Element instance from the Python implementation.
990 * 'state' should be the instance dict.
991 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300992
Eli Bendersky698bdb22013-01-10 06:01:06 -0800993static PyObject *
994element_setstate_from_Python(ElementObject *self, PyObject *state)
995{
996 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
997 PICKLED_TAIL, PICKLED_CHILDREN, 0};
998 PyObject *args;
999 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001000 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001001
Eli Bendersky698bdb22013-01-10 06:01:06 -08001002 tag = attrib = text = tail = children = NULL;
1003 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001004 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006
1007 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1008 &attrib, &text, &tail, &children))
1009 retval = element_setstate_from_attributes(self, tag, attrib, text,
1010 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001012 retval = NULL;
1013
1014 Py_DECREF(args);
1015 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001016}
1017
Serhiy Storchakacb985562015-05-04 15:32:48 +03001018/*[clinic input]
1019_elementtree.Element.__setstate__
1020
1021 state: object
1022 /
1023
1024[clinic start generated code]*/
1025
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001027_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1028/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029{
1030 if (!PyDict_CheckExact(state)) {
1031 PyErr_Format(PyExc_TypeError,
1032 "Don't know how to unpickle \"%.200R\" as an Element",
1033 state);
1034 return NULL;
1035 }
1036 else
1037 return element_setstate_from_Python(self, state);
1038}
1039
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001040LOCAL(int)
1041checkpath(PyObject* tag)
1042{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001043 Py_ssize_t i;
1044 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 /* check if a tag contains an xpath character */
1047
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001048#define PATHCHAR(ch) \
1049 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001050
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001051 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001052 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1053 void *data = PyUnicode_DATA(tag);
1054 unsigned int kind = PyUnicode_KIND(tag);
1055 for (i = 0; i < len; i++) {
1056 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1057 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001058 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001059 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 return 1;
1063 }
1064 return 0;
1065 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001066 if (PyBytes_Check(tag)) {
1067 char *p = PyBytes_AS_STRING(tag);
1068 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069 if (p[i] == '{')
1070 check = 0;
1071 else if (p[i] == '}')
1072 check = 1;
1073 else if (check && PATHCHAR(p[i]))
1074 return 1;
1075 }
1076 return 0;
1077 }
1078
1079 return 1; /* unknown type; might be path expression */
1080}
1081
Serhiy Storchakacb985562015-05-04 15:32:48 +03001082/*[clinic input]
1083_elementtree.Element.extend
1084
1085 elements: object
1086 /
1087
1088[clinic start generated code]*/
1089
1090static PyObject *
1091_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1092/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001093{
1094 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001095 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001096
Serhiy Storchakacb985562015-05-04 15:32:48 +03001097 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098 if (!seq) {
1099 PyErr_Format(
1100 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001101 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001102 );
1103 return NULL;
1104 }
1105
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001106 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001107 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 Py_INCREF(element);
1109 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001110 PyErr_Format(
1111 PyExc_TypeError,
1112 "expected an Element, not \"%.200s\"",
1113 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001114 Py_DECREF(seq);
1115 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001116 return NULL;
1117 }
1118
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119 if (element_add_subelement(self, element) < 0) {
1120 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001121 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 return NULL;
1123 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001124 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125 }
1126
1127 Py_DECREF(seq);
1128
1129 Py_RETURN_NONE;
1130}
1131
Serhiy Storchakacb985562015-05-04 15:32:48 +03001132/*[clinic input]
1133_elementtree.Element.find
1134
1135 path: object
1136 namespaces: object = None
1137
1138[clinic start generated code]*/
1139
1140static PyObject *
1141_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1142 PyObject *namespaces)
1143/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001144{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001145 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001146 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001147
Serhiy Storchakacb985562015-05-04 15:32:48 +03001148 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001149 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001150 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001151 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001153 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154
1155 if (!self->extra)
1156 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001157
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158 for (i = 0; i < self->extra->length; i++) {
1159 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001160 int rc;
1161 if (!Element_CheckExact(item))
1162 continue;
1163 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001164 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001165 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 Py_DECREF(item);
1168 if (rc < 0)
1169 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 }
1171
1172 Py_RETURN_NONE;
1173}
1174
Serhiy Storchakacb985562015-05-04 15:32:48 +03001175/*[clinic input]
1176_elementtree.Element.findtext
1177
1178 path: object
1179 default: object = None
1180 namespaces: object = None
1181
1182[clinic start generated code]*/
1183
1184static PyObject *
1185_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1186 PyObject *default_value,
1187 PyObject *namespaces)
1188/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001189{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001190 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001191 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001192 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193
Serhiy Storchakacb985562015-05-04 15:32:48 +03001194 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001197 );
1198
1199 if (!self->extra) {
1200 Py_INCREF(default_value);
1201 return default_value;
1202 }
1203
1204 for (i = 0; i < self->extra->length; i++) {
1205 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001206 int rc;
1207 if (!Element_CheckExact(item))
1208 continue;
1209 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001210 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001211 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001212 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (text == Py_None) {
1214 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001215 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001216 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001217 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 return text;
1220 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 Py_DECREF(item);
1222 if (rc < 0)
1223 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 }
1225
1226 Py_INCREF(default_value);
1227 return default_value;
1228}
1229
Serhiy Storchakacb985562015-05-04 15:32:48 +03001230/*[clinic input]
1231_elementtree.Element.findall
1232
1233 path: object
1234 namespaces: object = None
1235
1236[clinic start generated code]*/
1237
1238static PyObject *
1239_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1240 PyObject *namespaces)
1241/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001243 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001245 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001246 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001247
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001248 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001249 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001251 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001253 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 out = PyList_New(0);
1256 if (!out)
1257 return NULL;
1258
1259 if (!self->extra)
1260 return out;
1261
1262 for (i = 0; i < self->extra->length; i++) {
1263 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001264 int rc;
1265 if (!Element_CheckExact(item))
1266 continue;
1267 Py_INCREF(item);
1268 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1269 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1270 Py_DECREF(item);
1271 Py_DECREF(out);
1272 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001274 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
1276
1277 return out;
1278}
1279
Serhiy Storchakacb985562015-05-04 15:32:48 +03001280/*[clinic input]
1281_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282
Serhiy Storchakacb985562015-05-04 15:32:48 +03001283 path: object
1284 namespaces: object = None
1285
1286[clinic start generated code]*/
1287
1288static PyObject *
1289_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1290 PyObject *namespaces)
1291/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1292{
1293 PyObject* tag = path;
1294 _Py_IDENTIFIER(iterfind);
1295 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001296
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001297 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001298 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001299}
1300
Serhiy Storchakacb985562015-05-04 15:32:48 +03001301/*[clinic input]
1302_elementtree.Element.get
1303
1304 key: object
1305 default: object = None
1306
1307[clinic start generated code]*/
1308
1309static PyObject *
1310_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1311 PyObject *default_value)
1312/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313{
1314 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315
1316 if (!self->extra || self->extra->attrib == Py_None)
1317 value = default_value;
1318 else {
1319 value = PyDict_GetItem(self->extra->attrib, key);
1320 if (!value)
1321 value = default_value;
1322 }
1323
1324 Py_INCREF(value);
1325 return value;
1326}
1327
Serhiy Storchakacb985562015-05-04 15:32:48 +03001328/*[clinic input]
1329_elementtree.Element.getchildren
1330
1331[clinic start generated code]*/
1332
1333static PyObject *
1334_elementtree_Element_getchildren_impl(ElementObject *self)
1335/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001337 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338 PyObject* list;
1339
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001340 /* FIXME: report as deprecated? */
1341
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342 if (!self->extra)
1343 return PyList_New(0);
1344
1345 list = PyList_New(self->extra->length);
1346 if (!list)
1347 return NULL;
1348
1349 for (i = 0; i < self->extra->length; i++) {
1350 PyObject* item = self->extra->children[i];
1351 Py_INCREF(item);
1352 PyList_SET_ITEM(list, i, item);
1353 }
1354
1355 return list;
1356}
1357
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001358
Eli Bendersky64d11e62012-06-15 07:42:50 +03001359static PyObject *
1360create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1361
1362
Serhiy Storchakacb985562015-05-04 15:32:48 +03001363/*[clinic input]
1364_elementtree.Element.iter
1365
1366 tag: object = None
1367
1368[clinic start generated code]*/
1369
Eli Bendersky64d11e62012-06-15 07:42:50 +03001370static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001371_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1372/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001373{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001374 if (PyUnicode_Check(tag)) {
1375 if (PyUnicode_READY(tag) < 0)
1376 return NULL;
1377 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1378 tag = Py_None;
1379 }
1380 else if (PyBytes_Check(tag)) {
1381 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1382 tag = Py_None;
1383 }
1384
Eli Bendersky64d11e62012-06-15 07:42:50 +03001385 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001386}
1387
1388
Serhiy Storchakacb985562015-05-04 15:32:48 +03001389/*[clinic input]
1390_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001391
Serhiy Storchakacb985562015-05-04 15:32:48 +03001392[clinic start generated code]*/
1393
1394static PyObject *
1395_elementtree_Element_itertext_impl(ElementObject *self)
1396/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1397{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399}
1400
Eli Bendersky64d11e62012-06-15 07:42:50 +03001401
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001403element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001405 ElementObject* self = (ElementObject*) self_;
1406
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001407 if (!self->extra || index < 0 || index >= self->extra->length) {
1408 PyErr_SetString(
1409 PyExc_IndexError,
1410 "child index out of range"
1411 );
1412 return NULL;
1413 }
1414
1415 Py_INCREF(self->extra->children[index]);
1416 return self->extra->children[index];
1417}
1418
Serhiy Storchakacb985562015-05-04 15:32:48 +03001419/*[clinic input]
1420_elementtree.Element.insert
1421
1422 index: Py_ssize_t
1423 subelement: object(subclass_of='&Element_Type')
1424 /
1425
1426[clinic start generated code]*/
1427
1428static PyObject *
1429_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1430 PyObject *subelement)
1431/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001433 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434
Victor Stinner5f0af232013-07-11 23:01:36 +02001435 if (!self->extra) {
1436 if (create_extra(self, NULL) < 0)
1437 return NULL;
1438 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001440 if (index < 0) {
1441 index += self->extra->length;
1442 if (index < 0)
1443 index = 0;
1444 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445 if (index > self->extra->length)
1446 index = self->extra->length;
1447
1448 if (element_resize(self, 1) < 0)
1449 return NULL;
1450
1451 for (i = self->extra->length; i > index; i--)
1452 self->extra->children[i] = self->extra->children[i-1];
1453
Serhiy Storchakacb985562015-05-04 15:32:48 +03001454 Py_INCREF(subelement);
1455 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456
1457 self->extra->length++;
1458
1459 Py_RETURN_NONE;
1460}
1461
Serhiy Storchakacb985562015-05-04 15:32:48 +03001462/*[clinic input]
1463_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
Serhiy Storchakacb985562015-05-04 15:32:48 +03001465[clinic start generated code]*/
1466
1467static PyObject *
1468_elementtree_Element_items_impl(ElementObject *self)
1469/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1470{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001471 if (!self->extra || self->extra->attrib == Py_None)
1472 return PyList_New(0);
1473
1474 return PyDict_Items(self->extra->attrib);
1475}
1476
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477/*[clinic input]
1478_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001479
Serhiy Storchakacb985562015-05-04 15:32:48 +03001480[clinic start generated code]*/
1481
1482static PyObject *
1483_elementtree_Element_keys_impl(ElementObject *self)
1484/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1485{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486 if (!self->extra || self->extra->attrib == Py_None)
1487 return PyList_New(0);
1488
1489 return PyDict_Keys(self->extra->attrib);
1490}
1491
Martin v. Löwis18e16552006-02-15 17:27:45 +00001492static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493element_length(ElementObject* self)
1494{
1495 if (!self->extra)
1496 return 0;
1497
1498 return self->extra->length;
1499}
1500
Serhiy Storchakacb985562015-05-04 15:32:48 +03001501/*[clinic input]
1502_elementtree.Element.makeelement
1503
1504 tag: object
1505 attrib: object
1506 /
1507
1508[clinic start generated code]*/
1509
1510static PyObject *
1511_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1512 PyObject *attrib)
1513/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514{
1515 PyObject* elem;
1516
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001517 attrib = PyDict_Copy(attrib);
1518 if (!attrib)
1519 return NULL;
1520
Eli Bendersky092af1f2012-03-04 07:14:03 +02001521 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522
1523 Py_DECREF(attrib);
1524
1525 return elem;
1526}
1527
Serhiy Storchakacb985562015-05-04 15:32:48 +03001528/*[clinic input]
1529_elementtree.Element.remove
1530
1531 subelement: object(subclass_of='&Element_Type')
1532 /
1533
1534[clinic start generated code]*/
1535
1536static PyObject *
1537_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1538/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001540 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001541 int rc;
1542 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 if (!self->extra) {
1545 /* element has no children, so raise exception */
1546 PyErr_SetString(
1547 PyExc_ValueError,
1548 "list.remove(x): x not in list"
1549 );
1550 return NULL;
1551 }
1552
1553 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001554 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001555 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001556 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001557 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001558 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001559 if (rc < 0)
1560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561 }
1562
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001563 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001564 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001565 PyErr_SetString(
1566 PyExc_ValueError,
1567 "list.remove(x): x not in list"
1568 );
1569 return NULL;
1570 }
1571
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001572 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573
1574 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575 for (; i < self->extra->length; i++)
1576 self->extra->children[i] = self->extra->children[i+1];
1577
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001578 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579 Py_RETURN_NONE;
1580}
1581
1582static PyObject*
1583element_repr(ElementObject* self)
1584{
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001585 int status;
1586
1587 if (self->tag == NULL)
Eli Bendersky092af1f2012-03-04 07:14:03 +02001588 return PyUnicode_FromFormat("<Element at %p>", self);
Serhiy Storchaka9062c262016-06-12 09:43:55 +03001589
1590 status = Py_ReprEnter((PyObject *)self);
1591 if (status == 0) {
1592 PyObject *res;
1593 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1594 Py_ReprLeave((PyObject *)self);
1595 return res;
1596 }
1597 if (status > 0)
1598 PyErr_Format(PyExc_RuntimeError,
1599 "reentrant call inside %s.__repr__",
1600 Py_TYPE(self)->tp_name);
1601 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602}
1603
Serhiy Storchakacb985562015-05-04 15:32:48 +03001604/*[clinic input]
1605_elementtree.Element.set
1606
1607 key: object
1608 value: object
1609 /
1610
1611[clinic start generated code]*/
1612
1613static PyObject *
1614_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1615 PyObject *value)
1616/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001617{
1618 PyObject* attrib;
1619
Victor Stinner5f0af232013-07-11 23:01:36 +02001620 if (!self->extra) {
1621 if (create_extra(self, NULL) < 0)
1622 return NULL;
1623 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001624
1625 attrib = element_get_attrib(self);
1626 if (!attrib)
1627 return NULL;
1628
1629 if (PyDict_SetItem(attrib, key, value) < 0)
1630 return NULL;
1631
1632 Py_RETURN_NONE;
1633}
1634
1635static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001636element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001637{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001638 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001639 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001640 PyObject* old;
1641
1642 if (!self->extra || index < 0 || index >= self->extra->length) {
1643 PyErr_SetString(
1644 PyExc_IndexError,
1645 "child assignment index out of range");
1646 return -1;
1647 }
1648
1649 old = self->extra->children[index];
1650
1651 if (item) {
1652 Py_INCREF(item);
1653 self->extra->children[index] = item;
1654 } else {
1655 self->extra->length--;
1656 for (i = index; i < self->extra->length; i++)
1657 self->extra->children[i] = self->extra->children[i+1];
1658 }
1659
1660 Py_DECREF(old);
1661
1662 return 0;
1663}
1664
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001665static PyObject*
1666element_subscr(PyObject* self_, PyObject* item)
1667{
1668 ElementObject* self = (ElementObject*) self_;
1669
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001670 if (PyIndex_Check(item)) {
1671 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001672
1673 if (i == -1 && PyErr_Occurred()) {
1674 return NULL;
1675 }
1676 if (i < 0 && self->extra)
1677 i += self->extra->length;
1678 return element_getitem(self_, i);
1679 }
1680 else if (PySlice_Check(item)) {
1681 Py_ssize_t start, stop, step, slicelen, cur, i;
1682 PyObject* list;
1683
1684 if (!self->extra)
1685 return PyList_New(0);
1686
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001687 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001688 self->extra->length,
1689 &start, &stop, &step, &slicelen) < 0) {
1690 return NULL;
1691 }
1692
1693 if (slicelen <= 0)
1694 return PyList_New(0);
1695 else {
1696 list = PyList_New(slicelen);
1697 if (!list)
1698 return NULL;
1699
1700 for (cur = start, i = 0; i < slicelen;
1701 cur += step, i++) {
1702 PyObject* item = self->extra->children[cur];
1703 Py_INCREF(item);
1704 PyList_SET_ITEM(list, i, item);
1705 }
1706
1707 return list;
1708 }
1709 }
1710 else {
1711 PyErr_SetString(PyExc_TypeError,
1712 "element indices must be integers");
1713 return NULL;
1714 }
1715}
1716
1717static int
1718element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1719{
1720 ElementObject* self = (ElementObject*) self_;
1721
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001722 if (PyIndex_Check(item)) {
1723 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724
1725 if (i == -1 && PyErr_Occurred()) {
1726 return -1;
1727 }
1728 if (i < 0 && self->extra)
1729 i += self->extra->length;
1730 return element_setitem(self_, i, value);
1731 }
1732 else if (PySlice_Check(item)) {
1733 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1734
1735 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001736 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001737
Victor Stinner5f0af232013-07-11 23:01:36 +02001738 if (!self->extra) {
1739 if (create_extra(self, NULL) < 0)
1740 return -1;
1741 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001742
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001743 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001744 self->extra->length,
1745 &start, &stop, &step, &slicelen) < 0) {
1746 return -1;
1747 }
1748
Eli Bendersky865756a2012-03-09 13:38:15 +02001749 if (value == NULL) {
1750 /* Delete slice */
1751 size_t cur;
1752 Py_ssize_t i;
1753
1754 if (slicelen <= 0)
1755 return 0;
1756
1757 /* Since we're deleting, the direction of the range doesn't matter,
1758 * so for simplicity make it always ascending.
1759 */
1760 if (step < 0) {
1761 stop = start + 1;
1762 start = stop + step * (slicelen - 1) - 1;
1763 step = -step;
1764 }
1765
1766 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1767
1768 /* recycle is a list that will contain all the children
1769 * scheduled for removal.
1770 */
1771 if (!(recycle = PyList_New(slicelen))) {
1772 PyErr_NoMemory();
1773 return -1;
1774 }
1775
1776 /* This loop walks over all the children that have to be deleted,
1777 * with cur pointing at them. num_moved is the amount of children
1778 * until the next deleted child that have to be "shifted down" to
1779 * occupy the deleted's places.
1780 * Note that in the ith iteration, shifting is done i+i places down
1781 * because i children were already removed.
1782 */
1783 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1784 /* Compute how many children have to be moved, clipping at the
1785 * list end.
1786 */
1787 Py_ssize_t num_moved = step - 1;
1788 if (cur + step >= (size_t)self->extra->length) {
1789 num_moved = self->extra->length - cur - 1;
1790 }
1791
1792 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1793
1794 memmove(
1795 self->extra->children + cur - i,
1796 self->extra->children + cur + 1,
1797 num_moved * sizeof(PyObject *));
1798 }
1799
1800 /* Leftover "tail" after the last removed child */
1801 cur = start + (size_t)slicelen * step;
1802 if (cur < (size_t)self->extra->length) {
1803 memmove(
1804 self->extra->children + cur - slicelen,
1805 self->extra->children + cur,
1806 (self->extra->length - cur) * sizeof(PyObject *));
1807 }
1808
1809 self->extra->length -= slicelen;
1810
1811 /* Discard the recycle list with all the deleted sub-elements */
1812 Py_XDECREF(recycle);
1813 return 0;
1814 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001815
1816 /* A new slice is actually being assigned */
1817 seq = PySequence_Fast(value, "");
1818 if (!seq) {
1819 PyErr_Format(
1820 PyExc_TypeError,
1821 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1822 );
1823 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001824 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001825 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001826
1827 if (step != 1 && newlen != slicelen)
1828 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001829 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001830 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831 "attempt to assign sequence of size %zd "
1832 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001833 newlen, slicelen
1834 );
1835 return -1;
1836 }
1837
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001838 /* Resize before creating the recycle bin, to prevent refleaks. */
1839 if (newlen > slicelen) {
1840 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001841 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001842 return -1;
1843 }
1844 }
1845
1846 if (slicelen > 0) {
1847 /* to avoid recursive calls to this method (via decref), move
1848 old items to the recycle bin here, and get rid of them when
1849 we're done modifying the element */
1850 recycle = PyList_New(slicelen);
1851 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001852 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001853 return -1;
1854 }
1855 for (cur = start, i = 0; i < slicelen;
1856 cur += step, i++)
1857 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1858 }
1859
1860 if (newlen < slicelen) {
1861 /* delete slice */
1862 for (i = stop; i < self->extra->length; i++)
1863 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1864 } else if (newlen > slicelen) {
1865 /* insert slice */
1866 for (i = self->extra->length-1; i >= stop; i--)
1867 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1868 }
1869
1870 /* replace the slice */
1871 for (cur = start, i = 0; i < newlen;
1872 cur += step, i++) {
1873 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1874 Py_INCREF(element);
1875 self->extra->children[cur] = element;
1876 }
1877
1878 self->extra->length += newlen - slicelen;
1879
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001880 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881
1882 /* discard the recycle bin, and everything in it */
1883 Py_XDECREF(recycle);
1884
1885 return 0;
1886 }
1887 else {
1888 PyErr_SetString(PyExc_TypeError,
1889 "element indices must be integers");
1890 return -1;
1891 }
1892}
1893
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001894static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001895element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001896{
1897 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001898 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001899
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001900 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001901 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001902
Alexander Belopolskye239d232010-12-08 23:31:48 +00001903 if (name == NULL)
1904 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001905
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001906 /* handle common attributes first */
1907 if (strcmp(name, "tag") == 0) {
1908 res = self->tag;
1909 Py_INCREF(res);
1910 return res;
1911 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001912 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001913 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001914 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001915 }
1916
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001917 /* methods */
1918 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1919 if (res)
1920 return res;
1921
1922 /* less common attributes */
1923 if (strcmp(name, "tail") == 0) {
1924 PyErr_Clear();
1925 res = element_get_tail(self);
1926 } else if (strcmp(name, "attrib") == 0) {
1927 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001928 if (!self->extra) {
1929 if (create_extra(self, NULL) < 0)
1930 return NULL;
1931 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001932 res = element_get_attrib(self);
1933 }
1934
1935 if (!res)
1936 return NULL;
1937
1938 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001939 return res;
1940}
1941
Eli Benderskyef9683b2013-05-18 07:52:34 -07001942static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001943element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001944{
Eli Benderskyb20df952012-05-20 06:33:29 +03001945 char *name = "";
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001946
1947 if (value == NULL) {
1948 PyErr_SetString(PyExc_AttributeError,
1949 "can't delete attribute");
1950 return -1;
1951 }
Eli Benderskyb20df952012-05-20 06:33:29 +03001952 if (PyUnicode_Check(nameobj))
1953 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001954 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001955 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001956
1957 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001958 Py_INCREF(value);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03001959 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001960 } else if (strcmp(name, "text") == 0) {
1961 Py_DECREF(JOIN_OBJ(self->text));
1962 self->text = value;
1963 Py_INCREF(self->text);
1964 } else if (strcmp(name, "tail") == 0) {
1965 Py_DECREF(JOIN_OBJ(self->tail));
1966 self->tail = value;
1967 Py_INCREF(self->tail);
1968 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001969 if (!self->extra) {
1970 if (create_extra(self, NULL) < 0)
1971 return -1;
1972 }
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001973 Py_INCREF(value);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03001974 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001975 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001976 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001977 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001978 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001979 }
1980
Eli Benderskyef9683b2013-05-18 07:52:34 -07001981 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001982}
1983
1984static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001985 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001986 0, /* sq_concat */
1987 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001988 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001989 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001990 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001991 0,
1992};
1993
Eli Bendersky64d11e62012-06-15 07:42:50 +03001994/******************************* Element iterator ****************************/
1995
1996/* ElementIterObject represents the iteration state over an XML element in
1997 * pre-order traversal. To keep track of which sub-element should be returned
1998 * next, a stack of parents is maintained. This is a standard stack-based
1999 * iterative pre-order traversal of a tree.
2000 * The stack is managed using a single-linked list starting at parent_stack.
2001 * Each stack node contains the saved parent to which we should return after
2002 * the current one is exhausted, and the next child to examine in that parent.
2003 */
2004typedef struct ParentLocator_t {
2005 ElementObject *parent;
2006 Py_ssize_t child_index;
2007 struct ParentLocator_t *next;
2008} ParentLocator;
2009
2010typedef struct {
2011 PyObject_HEAD
2012 ParentLocator *parent_stack;
2013 ElementObject *root_element;
2014 PyObject *sought_tag;
2015 int root_done;
2016 int gettext;
2017} ElementIterObject;
2018
2019
2020static void
2021elementiter_dealloc(ElementIterObject *it)
2022{
2023 ParentLocator *p = it->parent_stack;
2024 while (p) {
2025 ParentLocator *temp = p;
2026 Py_XDECREF(p->parent);
2027 p = p->next;
2028 PyObject_Free(temp);
2029 }
2030
2031 Py_XDECREF(it->sought_tag);
2032 Py_XDECREF(it->root_element);
2033
2034 PyObject_GC_UnTrack(it);
2035 PyObject_GC_Del(it);
2036}
2037
2038static int
2039elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2040{
2041 ParentLocator *p = it->parent_stack;
2042 while (p) {
2043 Py_VISIT(p->parent);
2044 p = p->next;
2045 }
2046
2047 Py_VISIT(it->root_element);
2048 Py_VISIT(it->sought_tag);
2049 return 0;
2050}
2051
2052/* Helper function for elementiter_next. Add a new parent to the parent stack.
2053 */
2054static ParentLocator *
2055parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2056{
2057 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2058 if (new_node) {
2059 new_node->parent = parent;
2060 Py_INCREF(parent);
2061 new_node->child_index = 0;
2062 new_node->next = stack;
2063 }
2064 return new_node;
2065}
2066
2067static PyObject *
2068elementiter_next(ElementIterObject *it)
2069{
2070 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002071 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002072 * A short note on gettext: this function serves both the iter() and
2073 * itertext() methods to avoid code duplication. However, there are a few
2074 * small differences in the way these iterations work. Namely:
2075 * - itertext() only yields text from nodes that have it, and continues
2076 * iterating when a node doesn't have text (so it doesn't return any
2077 * node like iter())
2078 * - itertext() also has to handle tail, after finishing with all the
2079 * children of a node.
2080 */
Eli Bendersky113da642012-06-15 07:52:49 +03002081 ElementObject *cur_parent;
2082 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002083 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002084 ElementObject *elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002085
2086 while (1) {
2087 /* Handle the case reached in the beginning and end of iteration, where
2088 * the parent stack is empty. The root_done flag gives us indication
2089 * whether we've just started iterating (so root_done is 0), in which
2090 * case the root is returned. If root_done is 1 and we're here, the
2091 * iterator is exhausted.
2092 */
2093 if (!it->parent_stack->parent) {
2094 if (it->root_done) {
2095 PyErr_SetNone(PyExc_StopIteration);
2096 return NULL;
2097 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002098 elem = it->root_element;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002099 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002100 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101 if (!it->parent_stack) {
2102 PyErr_NoMemory();
2103 return NULL;
2104 }
2105
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002106 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002107 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002108 rc = (it->sought_tag == Py_None);
2109 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002110 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002111 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002112 if (rc < 0) {
2113 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002114 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002115 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002116 }
2117 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002118 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002119 PyObject *text = element_get_text(elem);
2120 if (!text) {
2121 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002122 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002123 }
2124 Py_INCREF(text);
2125 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002126 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002127 if (rc > 0)
2128 return text;
2129 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002130 if (rc < 0)
2131 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002132 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002133 return (PyObject *)elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002134 }
2135 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002136 else {
2137 Py_DECREF(elem);
2138 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002139 }
2140 }
2141
2142 /* See if there are children left to traverse in the current parent. If
2143 * yes, visit the next child. If not, pop the stack and try again.
2144 */
Eli Bendersky113da642012-06-15 07:52:49 +03002145 cur_parent = it->parent_stack->parent;
2146 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002147 if (cur_parent->extra && child_index < cur_parent->extra->length) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002148 elem = (ElementObject *)cur_parent->extra->children[child_index];
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 it->parent_stack->child_index++;
2150 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002151 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 if (!it->parent_stack) {
2153 PyErr_NoMemory();
2154 return NULL;
2155 }
2156
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002157 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002158 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002159 PyObject *text = element_get_text(elem);
2160 if (!text) {
2161 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002162 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002163 }
2164 Py_INCREF(text);
2165 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002166 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002167 if (rc > 0)
2168 return text;
2169 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002170 if (rc < 0)
2171 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002172 } else {
2173 rc = (it->sought_tag == Py_None);
2174 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002175 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002176 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002177 if (rc < 0) {
2178 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002179 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002180 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002181 }
2182 if (rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002183 return (PyObject *)elem;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002184 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002185 Py_DECREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002186 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002187 }
2188 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002189 PyObject *tail;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002190 ParentLocator *next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002191 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002192 Py_INCREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002193 tail = element_get_tail(cur_parent);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002194 if (!tail) {
2195 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002196 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002197 }
2198 Py_INCREF(tail);
2199 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002200 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002201 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002202 tail = Py_None;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002203 Py_INCREF(tail);
2204 }
2205 next = it->parent_stack->next;
2206 cur_parent = it->parent_stack->parent;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207 PyObject_Free(it->parent_stack);
2208 it->parent_stack = next;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002209 Py_XDECREF(cur_parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002210
2211 /* Note that extra condition on it->parent_stack->parent here;
2212 * this is because itertext() is supposed to only return *inner*
2213 * text, not text following the element it began iteration with.
2214 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002215 if (it->parent_stack->parent) {
2216 rc = PyObject_IsTrue(tail);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002217 if (rc > 0)
2218 return tail;
2219 Py_DECREF(tail);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002220 if (rc < 0)
2221 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002222 }
2223 else {
2224 Py_DECREF(tail);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002225 }
2226 }
2227 }
2228
2229 return NULL;
2230}
2231
2232
2233static PyTypeObject ElementIter_Type = {
2234 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002235 /* Using the module's name since the pure-Python implementation does not
2236 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237 "_elementtree._element_iterator", /* tp_name */
2238 sizeof(ElementIterObject), /* tp_basicsize */
2239 0, /* tp_itemsize */
2240 /* methods */
2241 (destructor)elementiter_dealloc, /* tp_dealloc */
2242 0, /* tp_print */
2243 0, /* tp_getattr */
2244 0, /* tp_setattr */
2245 0, /* tp_reserved */
2246 0, /* tp_repr */
2247 0, /* tp_as_number */
2248 0, /* tp_as_sequence */
2249 0, /* tp_as_mapping */
2250 0, /* tp_hash */
2251 0, /* tp_call */
2252 0, /* tp_str */
2253 0, /* tp_getattro */
2254 0, /* tp_setattro */
2255 0, /* tp_as_buffer */
2256 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2257 0, /* tp_doc */
2258 (traverseproc)elementiter_traverse, /* tp_traverse */
2259 0, /* tp_clear */
2260 0, /* tp_richcompare */
2261 0, /* tp_weaklistoffset */
2262 PyObject_SelfIter, /* tp_iter */
2263 (iternextfunc)elementiter_next, /* tp_iternext */
2264 0, /* tp_methods */
2265 0, /* tp_members */
2266 0, /* tp_getset */
2267 0, /* tp_base */
2268 0, /* tp_dict */
2269 0, /* tp_descr_get */
2270 0, /* tp_descr_set */
2271 0, /* tp_dictoffset */
2272 0, /* tp_init */
2273 0, /* tp_alloc */
2274 0, /* tp_new */
2275};
2276
2277
2278static PyObject *
2279create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2280{
2281 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002282
2283 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2284 if (!it)
2285 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002286
Victor Stinner4d463432013-07-11 23:05:03 +02002287 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002288 it->sought_tag = tag;
2289 it->root_done = 0;
2290 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002291 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002292 it->root_element = self;
2293
Eli Bendersky64d11e62012-06-15 07:42:50 +03002294 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002295
2296 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2297 if (it->parent_stack == NULL) {
2298 Py_DECREF(it);
2299 PyErr_NoMemory();
2300 return NULL;
2301 }
2302 it->parent_stack->parent = NULL;
2303 it->parent_stack->child_index = 0;
2304 it->parent_stack->next = NULL;
2305
Eli Bendersky64d11e62012-06-15 07:42:50 +03002306 return (PyObject *)it;
2307}
2308
2309
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310/* ==================================================================== */
2311/* the tree builder type */
2312
2313typedef struct {
2314 PyObject_HEAD
2315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002317
Antoine Pitrouee329312012-10-04 19:53:29 +02002318 PyObject *this; /* current node */
2319 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002320
Eli Bendersky58d548d2012-05-29 15:45:16 +03002321 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002322
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 PyObject *stack; /* element stack */
2324 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky48d358b2012-05-30 17:57:50 +03002326 PyObject *element_factory;
2327
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 PyObject *events; /* list of events, or NULL if not collecting */
2330 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2331 PyObject *end_event_obj;
2332 PyObject *start_ns_event_obj;
2333 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334} TreeBuilderObject;
2335
Christian Heimes90aa7642007-12-19 02:45:37 +00002336#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002337
2338/* -------------------------------------------------------------------- */
2339/* constructor and destructor */
2340
Eli Bendersky58d548d2012-05-29 15:45:16 +03002341static PyObject *
2342treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2345 if (t != NULL) {
2346 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002347
Eli Bendersky58d548d2012-05-29 15:45:16 +03002348 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002349 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002350 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002351 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002354 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 t->stack = PyList_New(20);
2356 if (!t->stack) {
2357 Py_DECREF(t->this);
2358 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002359 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002360 return NULL;
2361 }
2362 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002363
Eli Bendersky58d548d2012-05-29 15:45:16 +03002364 t->events = NULL;
2365 t->start_event_obj = t->end_event_obj = NULL;
2366 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2367 }
2368 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002369}
2370
Serhiy Storchakacb985562015-05-04 15:32:48 +03002371/*[clinic input]
2372_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002373
Serhiy Storchakacb985562015-05-04 15:32:48 +03002374 element_factory: object = NULL
2375
2376[clinic start generated code]*/
2377
2378static int
2379_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2380 PyObject *element_factory)
2381/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2382{
2383 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384
2385 if (element_factory) {
2386 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002387 tmp = self->element_factory;
2388 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002389 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002390 }
2391
Eli Bendersky58d548d2012-05-29 15:45:16 +03002392 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393}
2394
Eli Bendersky48d358b2012-05-30 17:57:50 +03002395static int
2396treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2397{
2398 Py_VISIT(self->root);
2399 Py_VISIT(self->this);
2400 Py_VISIT(self->last);
2401 Py_VISIT(self->data);
2402 Py_VISIT(self->stack);
2403 Py_VISIT(self->element_factory);
2404 return 0;
2405}
2406
2407static int
2408treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002410 Py_CLEAR(self->end_ns_event_obj);
2411 Py_CLEAR(self->start_ns_event_obj);
2412 Py_CLEAR(self->end_event_obj);
2413 Py_CLEAR(self->start_event_obj);
2414 Py_CLEAR(self->events);
2415 Py_CLEAR(self->stack);
2416 Py_CLEAR(self->data);
2417 Py_CLEAR(self->last);
2418 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002419 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002420 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002421 return 0;
2422}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002423
Eli Bendersky48d358b2012-05-30 17:57:50 +03002424static void
2425treebuilder_dealloc(TreeBuilderObject *self)
2426{
2427 PyObject_GC_UnTrack(self);
2428 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002429 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002430}
2431
2432/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002433/* helpers for handling of arbitrary element-like objects */
2434
2435static int
2436treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2437 PyObject **dest, _Py_Identifier *name)
2438{
2439 if (Element_CheckExact(element)) {
2440 Py_DECREF(JOIN_OBJ(*dest));
2441 *dest = JOIN_SET(data, PyList_CheckExact(data));
2442 return 0;
2443 }
2444 else {
2445 PyObject *joined = list_join(data);
2446 int r;
2447 if (joined == NULL)
2448 return -1;
2449 r = _PyObject_SetAttrId(element, name, joined);
2450 Py_DECREF(joined);
2451 return r;
2452 }
2453}
2454
2455/* These two functions steal a reference to data */
2456static int
2457treebuilder_set_element_text(PyObject *element, PyObject *data)
2458{
2459 _Py_IDENTIFIER(text);
2460 return treebuilder_set_element_text_or_tail(
2461 element, data, &((ElementObject *) element)->text, &PyId_text);
2462}
2463
2464static int
2465treebuilder_set_element_tail(PyObject *element, PyObject *data)
2466{
2467 _Py_IDENTIFIER(tail);
2468 return treebuilder_set_element_text_or_tail(
2469 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2470}
2471
2472static int
2473treebuilder_add_subelement(PyObject *element, PyObject *child)
2474{
2475 _Py_IDENTIFIER(append);
2476 if (Element_CheckExact(element)) {
2477 ElementObject *elem = (ElementObject *) element;
2478 return element_add_subelement(elem, child);
2479 }
2480 else {
2481 PyObject *res;
2482 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2483 if (res == NULL)
2484 return -1;
2485 Py_DECREF(res);
2486 return 0;
2487 }
2488}
2489
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002490LOCAL(int)
2491treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2492 PyObject *node)
2493{
2494 if (action != NULL) {
2495 PyObject *res = PyTuple_Pack(2, action, node);
2496 if (res == NULL)
2497 return -1;
2498 if (PyList_Append(self->events, res) < 0) {
2499 Py_DECREF(res);
2500 return -1;
2501 }
2502 Py_DECREF(res);
2503 }
2504 return 0;
2505}
2506
Antoine Pitrouee329312012-10-04 19:53:29 +02002507/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508/* handlers */
2509
2510LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2512 PyObject* attrib)
2513{
2514 PyObject* node;
2515 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002516 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517
2518 if (self->data) {
2519 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002520 if (treebuilder_set_element_text(self->last, self->data))
2521 return NULL;
2522 }
2523 else {
2524 if (treebuilder_set_element_tail(self->last, self->data))
2525 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526 }
2527 self->data = NULL;
2528 }
2529
Eli Bendersky08231a92013-05-18 15:47:16 -07002530 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002531 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2532 } else {
2533 node = create_new_element(tag, attrib);
2534 }
2535 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002537 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538
Antoine Pitrouee329312012-10-04 19:53:29 +02002539 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
2541 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002542 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002543 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544 } else {
2545 if (self->root) {
2546 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002547 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 "multiple elements on top level"
2549 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002550 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002551 }
2552 Py_INCREF(node);
2553 self->root = node;
2554 }
2555
2556 if (self->index < PyList_GET_SIZE(self->stack)) {
2557 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002558 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559 Py_INCREF(this);
2560 } else {
2561 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002562 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 }
2564 self->index++;
2565
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002567 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002568 Py_INCREF(node);
Serhiy Storchaka57a01d32016-04-10 18:05:40 +03002569 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002571 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2572 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573
2574 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002575
2576 error:
2577 Py_DECREF(node);
2578 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002579}
2580
2581LOCAL(PyObject*)
2582treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2583{
2584 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002585 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002586 /* ignore calls to data before the first call to start */
2587 Py_RETURN_NONE;
2588 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589 /* store the first item as is */
2590 Py_INCREF(data); self->data = data;
2591 } else {
2592 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002593 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2594 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002595 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 /* expat often generates single character data sections; handle
2597 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002598 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2599 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002600 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002601 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002602 } else if (PyList_CheckExact(self->data)) {
2603 if (PyList_Append(self->data, data) < 0)
2604 return NULL;
2605 } else {
2606 PyObject* list = PyList_New(2);
2607 if (!list)
2608 return NULL;
2609 PyList_SET_ITEM(list, 0, self->data);
2610 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2611 self->data = list;
2612 }
2613 }
2614
2615 Py_RETURN_NONE;
2616}
2617
2618LOCAL(PyObject*)
2619treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2620{
2621 PyObject* item;
2622
2623 if (self->data) {
2624 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002625 if (treebuilder_set_element_text(self->last, self->data))
2626 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002627 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002628 if (treebuilder_set_element_tail(self->last, self->data))
2629 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630 }
2631 self->data = NULL;
2632 }
2633
2634 if (self->index == 0) {
2635 PyErr_SetString(
2636 PyExc_IndexError,
2637 "pop from empty stack"
2638 );
2639 return NULL;
2640 }
2641
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002642 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002643 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002644 self->index--;
2645 self->this = PyList_GET_ITEM(self->stack, self->index);
2646 Py_INCREF(self->this);
2647 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002649 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2650 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002651
2652 Py_INCREF(self->last);
2653 return (PyObject*) self->last;
2654}
2655
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002656/* -------------------------------------------------------------------- */
2657/* methods (in alphabetical order) */
2658
Serhiy Storchakacb985562015-05-04 15:32:48 +03002659/*[clinic input]
2660_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002661
Serhiy Storchakacb985562015-05-04 15:32:48 +03002662 data: object
2663 /
2664
2665[clinic start generated code]*/
2666
2667static PyObject *
2668_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2669/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2670{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671 return treebuilder_handle_data(self, data);
2672}
2673
Serhiy Storchakacb985562015-05-04 15:32:48 +03002674/*[clinic input]
2675_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002676
Serhiy Storchakacb985562015-05-04 15:32:48 +03002677 tag: object
2678 /
2679
2680[clinic start generated code]*/
2681
2682static PyObject *
2683_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2684/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2685{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686 return treebuilder_handle_end(self, tag);
2687}
2688
2689LOCAL(PyObject*)
2690treebuilder_done(TreeBuilderObject* self)
2691{
2692 PyObject* res;
2693
2694 /* FIXME: check stack size? */
2695
2696 if (self->root)
2697 res = self->root;
2698 else
2699 res = Py_None;
2700
2701 Py_INCREF(res);
2702 return res;
2703}
2704
Serhiy Storchakacb985562015-05-04 15:32:48 +03002705/*[clinic input]
2706_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707
Serhiy Storchakacb985562015-05-04 15:32:48 +03002708[clinic start generated code]*/
2709
2710static PyObject *
2711_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2712/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2713{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714 return treebuilder_done(self);
2715}
2716
Serhiy Storchakacb985562015-05-04 15:32:48 +03002717/*[clinic input]
2718_elementtree.TreeBuilder.start
2719
2720 tag: object
2721 attrs: object = None
2722 /
2723
2724[clinic start generated code]*/
2725
2726static PyObject *
2727_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2728 PyObject *attrs)
2729/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002731 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732}
2733
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734/* ==================================================================== */
2735/* the expat interface */
2736
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002738#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002739
2740/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2741 * cached globally without being in per-module state.
2742 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002743static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745
Eli Bendersky52467b12012-06-01 07:13:08 +03002746static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2747 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2748
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749typedef struct {
2750 PyObject_HEAD
2751
2752 XML_Parser parser;
2753
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002754 PyObject *target;
2755 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002757 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002759 PyObject *handle_start;
2760 PyObject *handle_data;
2761 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002762
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002763 PyObject *handle_comment;
2764 PyObject *handle_pi;
2765 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002767 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002768
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002769} XMLParserObject;
2770
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002771static PyObject*
2772_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2773static PyObject *
2774_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2775 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002776
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002777/* helpers */
2778
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002779LOCAL(PyObject*)
2780makeuniversal(XMLParserObject* self, const char* string)
2781{
2782 /* convert a UTF-8 tag/attribute name from the expat parser
2783 to a universal name string */
2784
Antoine Pitrouc1948842012-10-01 23:40:37 +02002785 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002786 PyObject* key;
2787 PyObject* value;
2788
2789 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002790 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791 if (!key)
2792 return NULL;
2793
2794 value = PyDict_GetItem(self->names, key);
2795
2796 if (value) {
2797 Py_INCREF(value);
2798 } else {
2799 /* new name. convert to universal name, and decode as
2800 necessary */
2801
2802 PyObject* tag;
2803 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002804 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805
2806 /* look for namespace separator */
2807 for (i = 0; i < size; i++)
2808 if (string[i] == '}')
2809 break;
2810 if (i != size) {
2811 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002812 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002813 if (tag == NULL) {
2814 Py_DECREF(key);
2815 return NULL;
2816 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002817 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002818 p[0] = '{';
2819 memcpy(p+1, string, size);
2820 size++;
2821 } else {
2822 /* plain name; use key as tag */
2823 Py_INCREF(key);
2824 tag = key;
2825 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002826
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002827 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002828 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002829 value = PyUnicode_DecodeUTF8(p, size, "strict");
2830 Py_DECREF(tag);
2831 if (!value) {
2832 Py_DECREF(key);
2833 return NULL;
2834 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002835
2836 /* add to names dictionary */
2837 if (PyDict_SetItem(self->names, key, value) < 0) {
2838 Py_DECREF(key);
2839 Py_DECREF(value);
2840 return NULL;
2841 }
2842 }
2843
2844 Py_DECREF(key);
2845 return value;
2846}
2847
Eli Bendersky5b77d812012-03-16 08:20:05 +02002848/* Set the ParseError exception with the given parameters.
2849 * If message is not NULL, it's used as the error string. Otherwise, the
2850 * message string is the default for the given error_code.
2851*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002852static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002853expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2854 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002855{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002856 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002857 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002858
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002859 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002860 message ? message : EXPAT(ErrorString)(error_code),
2861 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002862 if (errmsg == NULL)
2863 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002864
Eli Bendersky532d03e2013-08-10 08:00:39 -07002865 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002866 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002867 if (!error)
2868 return;
2869
Eli Bendersky5b77d812012-03-16 08:20:05 +02002870 /* Add code and position attributes */
2871 code = PyLong_FromLong((long)error_code);
2872 if (!code) {
2873 Py_DECREF(error);
2874 return;
2875 }
2876 if (PyObject_SetAttrString(error, "code", code) == -1) {
2877 Py_DECREF(error);
2878 Py_DECREF(code);
2879 return;
2880 }
2881 Py_DECREF(code);
2882
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002883 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002884 if (!position) {
2885 Py_DECREF(error);
2886 return;
2887 }
2888 if (PyObject_SetAttrString(error, "position", position) == -1) {
2889 Py_DECREF(error);
2890 Py_DECREF(position);
2891 return;
2892 }
2893 Py_DECREF(position);
2894
Eli Bendersky532d03e2013-08-10 08:00:39 -07002895 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002896 Py_DECREF(error);
2897}
2898
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002899/* -------------------------------------------------------------------- */
2900/* handlers */
2901
2902static void
2903expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2904 int data_len)
2905{
2906 PyObject* key;
2907 PyObject* value;
2908 PyObject* res;
2909
2910 if (data_len < 2 || data_in[0] != '&')
2911 return;
2912
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002913 if (PyErr_Occurred())
2914 return;
2915
Neal Norwitz0269b912007-08-08 06:56:02 +00002916 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002917 if (!key)
2918 return;
2919
2920 value = PyDict_GetItem(self->entity, key);
2921
2922 if (value) {
2923 if (TreeBuilder_CheckExact(self->target))
2924 res = treebuilder_handle_data(
2925 (TreeBuilderObject*) self->target, value
2926 );
2927 else if (self->handle_data)
2928 res = PyObject_CallFunction(self->handle_data, "O", value);
2929 else
2930 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002932 } else if (!PyErr_Occurred()) {
2933 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002934 char message[128] = "undefined entity ";
2935 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002936 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002937 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002939 EXPAT(GetErrorColumnNumber)(self->parser),
2940 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002941 );
2942 }
2943
2944 Py_DECREF(key);
2945}
2946
2947static void
2948expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2949 const XML_Char **attrib_in)
2950{
2951 PyObject* res;
2952 PyObject* tag;
2953 PyObject* attrib;
2954 int ok;
2955
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002956 if (PyErr_Occurred())
2957 return;
2958
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002959 /* tag name */
2960 tag = makeuniversal(self, tag_in);
2961 if (!tag)
2962 return; /* parser will look for errors */
2963
2964 /* attributes */
2965 if (attrib_in[0]) {
2966 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002967 if (!attrib) {
2968 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002969 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002970 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 while (attrib_in[0] && attrib_in[1]) {
2972 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002973 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 if (!key || !value) {
2975 Py_XDECREF(value);
2976 Py_XDECREF(key);
2977 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002978 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002979 return;
2980 }
2981 ok = PyDict_SetItem(attrib, key, value);
2982 Py_DECREF(value);
2983 Py_DECREF(key);
2984 if (ok < 0) {
2985 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002986 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 return;
2988 }
2989 attrib_in += 2;
2990 }
2991 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002992 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002993 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002994 if (!attrib) {
2995 Py_DECREF(tag);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002996 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002997 }
Eli Bendersky48d358b2012-05-30 17:57:50 +03002998 }
2999
3000 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003001 /* shortcut */
3002 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3003 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003004 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003005 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003007 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003008 res = NULL;
3009
3010 Py_DECREF(tag);
3011 Py_DECREF(attrib);
3012
3013 Py_XDECREF(res);
3014}
3015
3016static void
3017expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3018 int data_len)
3019{
3020 PyObject* data;
3021 PyObject* res;
3022
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003023 if (PyErr_Occurred())
3024 return;
3025
Neal Norwitz0269b912007-08-08 06:56:02 +00003026 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003027 if (!data)
3028 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003029
3030 if (TreeBuilder_CheckExact(self->target))
3031 /* shortcut */
3032 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3033 else if (self->handle_data)
3034 res = PyObject_CallFunction(self->handle_data, "O", data);
3035 else
3036 res = NULL;
3037
3038 Py_DECREF(data);
3039
3040 Py_XDECREF(res);
3041}
3042
3043static void
3044expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3045{
3046 PyObject* tag;
3047 PyObject* res = NULL;
3048
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003049 if (PyErr_Occurred())
3050 return;
3051
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003052 if (TreeBuilder_CheckExact(self->target))
3053 /* shortcut */
3054 /* the standard tree builder doesn't look at the end tag */
3055 res = treebuilder_handle_end(
3056 (TreeBuilderObject*) self->target, Py_None
3057 );
3058 else if (self->handle_end) {
3059 tag = makeuniversal(self, tag_in);
3060 if (tag) {
3061 res = PyObject_CallFunction(self->handle_end, "O", tag);
3062 Py_DECREF(tag);
3063 }
3064 }
3065
3066 Py_XDECREF(res);
3067}
3068
3069static void
3070expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3071 const XML_Char *uri)
3072{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003073 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3074 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003075
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003076 if (PyErr_Occurred())
3077 return;
3078
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003079 if (!target->events || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003080 return;
3081
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003082 if (!uri)
3083 uri = "";
3084 if (!prefix)
3085 prefix = "";
3086
3087 parcel = Py_BuildValue("ss", prefix, uri);
3088 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003089 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003090 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3091 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003092}
3093
3094static void
3095expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3096{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003097 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3098
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003099 if (PyErr_Occurred())
3100 return;
3101
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003102 if (!target->events)
3103 return;
3104
3105 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106}
3107
3108static void
3109expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3110{
3111 PyObject* comment;
3112 PyObject* res;
3113
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003114 if (PyErr_Occurred())
3115 return;
3116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003118 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003119 if (comment) {
3120 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3121 Py_XDECREF(res);
3122 Py_DECREF(comment);
3123 }
3124 }
3125}
3126
Eli Bendersky45839902013-01-13 05:14:47 -08003127static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003128expat_start_doctype_handler(XMLParserObject *self,
3129 const XML_Char *doctype_name,
3130 const XML_Char *sysid,
3131 const XML_Char *pubid,
3132 int has_internal_subset)
3133{
3134 PyObject *self_pyobj = (PyObject *)self;
3135 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3136 PyObject *parser_doctype = NULL;
3137 PyObject *res = NULL;
3138
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003139 if (PyErr_Occurred())
3140 return;
3141
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003142 doctype_name_obj = makeuniversal(self, doctype_name);
3143 if (!doctype_name_obj)
3144 return;
3145
3146 if (sysid) {
3147 sysid_obj = makeuniversal(self, sysid);
3148 if (!sysid_obj) {
3149 Py_DECREF(doctype_name_obj);
3150 return;
3151 }
3152 } else {
3153 Py_INCREF(Py_None);
3154 sysid_obj = Py_None;
3155 }
3156
3157 if (pubid) {
3158 pubid_obj = makeuniversal(self, pubid);
3159 if (!pubid_obj) {
3160 Py_DECREF(doctype_name_obj);
3161 Py_DECREF(sysid_obj);
3162 return;
3163 }
3164 } else {
3165 Py_INCREF(Py_None);
3166 pubid_obj = Py_None;
3167 }
3168
3169 /* If the target has a handler for doctype, call it. */
3170 if (self->handle_doctype) {
3171 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3172 doctype_name_obj, pubid_obj, sysid_obj);
3173 Py_CLEAR(res);
3174 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003175 else {
3176 /* Now see if the parser itself has a doctype method. If yes and it's
3177 * a custom method, call it but warn about deprecation. If it's only
3178 * the vanilla XMLParser method, do nothing.
3179 */
3180 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3181 if (parser_doctype &&
3182 !(PyCFunction_Check(parser_doctype) &&
3183 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3184 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003185 (PyCFunction) _elementtree_XMLParser_doctype)) {
3186 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3187 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003188 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003189 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003190 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003191 res = PyObject_CallFunction(parser_doctype, "OOO",
3192 doctype_name_obj, pubid_obj, sysid_obj);
3193 Py_CLEAR(res);
3194 }
3195 }
3196
3197clear:
3198 Py_XDECREF(parser_doctype);
3199 Py_DECREF(doctype_name_obj);
3200 Py_DECREF(pubid_obj);
3201 Py_DECREF(sysid_obj);
3202}
3203
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204static void
3205expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3206 const XML_Char* data_in)
3207{
3208 PyObject* target;
3209 PyObject* data;
3210 PyObject* res;
3211
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003212 if (PyErr_Occurred())
3213 return;
3214
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003216 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3217 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218 if (target && data) {
3219 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3220 Py_XDECREF(res);
3221 Py_DECREF(data);
3222 Py_DECREF(target);
3223 } else {
3224 Py_XDECREF(data);
3225 Py_XDECREF(target);
3226 }
3227 }
3228}
3229
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003230/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231
Eli Bendersky52467b12012-06-01 07:13:08 +03003232static PyObject *
3233xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234{
Eli Bendersky52467b12012-06-01 07:13:08 +03003235 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3236 if (self) {
3237 self->parser = NULL;
3238 self->target = self->entity = self->names = NULL;
3239 self->handle_start = self->handle_data = self->handle_end = NULL;
3240 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003241 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003243 return (PyObject *)self;
3244}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245
Serhiy Storchakacb985562015-05-04 15:32:48 +03003246/*[clinic input]
3247_elementtree.XMLParser.__init__
3248
3249 html: object = NULL
3250 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003251 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003252
3253[clinic start generated code]*/
3254
Eli Bendersky52467b12012-06-01 07:13:08 +03003255static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003256_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3257 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003258/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003259{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003260 self->entity = PyDict_New();
3261 if (!self->entity)
3262 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263
Serhiy Storchakacb985562015-05-04 15:32:48 +03003264 self->names = PyDict_New();
3265 if (!self->names) {
3266 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003269
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3271 if (!self->parser) {
3272 Py_CLEAR(self->entity);
3273 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003276 }
3277
Eli Bendersky52467b12012-06-01 07:13:08 +03003278 if (target) {
3279 Py_INCREF(target);
3280 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003281 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003283 Py_CLEAR(self->entity);
3284 Py_CLEAR(self->names);
3285 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003286 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003288 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->handle_start = PyObject_GetAttrString(target, "start");
3292 self->handle_data = PyObject_GetAttrString(target, "data");
3293 self->handle_end = PyObject_GetAttrString(target, "end");
3294 self->handle_comment = PyObject_GetAttrString(target, "comment");
3295 self->handle_pi = PyObject_GetAttrString(target, "pi");
3296 self->handle_close = PyObject_GetAttrString(target, "close");
3297 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298
3299 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003300
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 (XML_StartElementHandler) expat_start_handler,
3306 (XML_EndElementHandler) expat_end_handler
3307 );
3308 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003309 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003310 (XML_DefaultHandler) expat_default_handler
3311 );
3312 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003313 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 (XML_CharacterDataHandler) expat_data_handler
3315 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003316 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 (XML_CommentHandler) expat_comment_handler
3320 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003321 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003322 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003323 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 (XML_ProcessingInstructionHandler) expat_pi_handler
3325 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003326 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003327 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003328 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3329 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003331 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003332 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003334
Eli Bendersky52467b12012-06-01 07:13:08 +03003335 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336}
3337
Eli Bendersky52467b12012-06-01 07:13:08 +03003338static int
3339xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3340{
3341 Py_VISIT(self->handle_close);
3342 Py_VISIT(self->handle_pi);
3343 Py_VISIT(self->handle_comment);
3344 Py_VISIT(self->handle_end);
3345 Py_VISIT(self->handle_data);
3346 Py_VISIT(self->handle_start);
3347
3348 Py_VISIT(self->target);
3349 Py_VISIT(self->entity);
3350 Py_VISIT(self->names);
3351
3352 return 0;
3353}
3354
3355static int
3356xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003357{
3358 EXPAT(ParserFree)(self->parser);
3359
Antoine Pitrouc1948842012-10-01 23:40:37 +02003360 Py_CLEAR(self->handle_close);
3361 Py_CLEAR(self->handle_pi);
3362 Py_CLEAR(self->handle_comment);
3363 Py_CLEAR(self->handle_end);
3364 Py_CLEAR(self->handle_data);
3365 Py_CLEAR(self->handle_start);
3366 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367
Antoine Pitrouc1948842012-10-01 23:40:37 +02003368 Py_CLEAR(self->target);
3369 Py_CLEAR(self->entity);
3370 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003371
Eli Bendersky52467b12012-06-01 07:13:08 +03003372 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373}
3374
Eli Bendersky52467b12012-06-01 07:13:08 +03003375static void
3376xmlparser_dealloc(XMLParserObject* self)
3377{
3378 PyObject_GC_UnTrack(self);
3379 xmlparser_gc_clear(self);
3380 Py_TYPE(self)->tp_free((PyObject *)self);
3381}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003382
3383LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003384expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385{
3386 int ok;
3387
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003388 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003389 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3390
3391 if (PyErr_Occurred())
3392 return NULL;
3393
3394 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003395 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003396 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003398 EXPAT(GetErrorColumnNumber)(self->parser),
3399 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 );
3401 return NULL;
3402 }
3403
3404 Py_RETURN_NONE;
3405}
3406
Serhiy Storchakacb985562015-05-04 15:32:48 +03003407/*[clinic input]
3408_elementtree.XMLParser.close
3409
3410[clinic start generated code]*/
3411
3412static PyObject *
3413_elementtree_XMLParser_close_impl(XMLParserObject *self)
3414/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003415{
3416 /* end feeding data to parser */
3417
3418 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003420 if (!res)
3421 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003422
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003423 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003424 Py_DECREF(res);
3425 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003426 }
3427 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003428 Py_DECREF(res);
3429 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003430 }
3431 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003432 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003433 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434}
3435
Serhiy Storchakacb985562015-05-04 15:32:48 +03003436/*[clinic input]
3437_elementtree.XMLParser.feed
3438
3439 data: object
3440 /
3441
3442[clinic start generated code]*/
3443
3444static PyObject *
3445_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3446/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003447{
3448 /* feed data to parser */
3449
Serhiy Storchakacb985562015-05-04 15:32:48 +03003450 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003451 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003452 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3453 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003454 return NULL;
3455 if (data_len > INT_MAX) {
3456 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3457 return NULL;
3458 }
3459 /* Explicitly set UTF-8 encoding. Return code ignored. */
3460 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003461 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003462 }
3463 else {
3464 Py_buffer view;
3465 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003466 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003467 return NULL;
3468 if (view.len > INT_MAX) {
3469 PyBuffer_Release(&view);
3470 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3471 return NULL;
3472 }
3473 res = expat_parse(self, view.buf, (int)view.len, 0);
3474 PyBuffer_Release(&view);
3475 return res;
3476 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003477}
3478
Serhiy Storchakacb985562015-05-04 15:32:48 +03003479/*[clinic input]
3480_elementtree.XMLParser._parse_whole
3481
3482 file: object
3483 /
3484
3485[clinic start generated code]*/
3486
3487static PyObject *
3488_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3489/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003490{
Eli Benderskya3699232013-05-19 18:47:23 -07003491 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492 PyObject* reader;
3493 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003494 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003495 PyObject* res;
3496
Serhiy Storchakacb985562015-05-04 15:32:48 +03003497 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 if (!reader)
3499 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003500
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003501 /* read from open file object */
3502 for (;;) {
3503
3504 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3505
3506 if (!buffer) {
3507 /* read failed (e.g. due to KeyboardInterrupt) */
3508 Py_DECREF(reader);
3509 return NULL;
3510 }
3511
Eli Benderskyf996e772012-03-16 05:53:30 +02003512 if (PyUnicode_CheckExact(buffer)) {
3513 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003514 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003515 Py_DECREF(buffer);
3516 break;
3517 }
3518 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003519 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003520 if (!temp) {
3521 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003522 Py_DECREF(reader);
3523 return NULL;
3524 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003525 buffer = temp;
3526 }
3527 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003528 Py_DECREF(buffer);
3529 break;
3530 }
3531
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003532 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3533 Py_DECREF(buffer);
3534 Py_DECREF(reader);
3535 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3536 return NULL;
3537 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003538 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003539 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003540 );
3541
3542 Py_DECREF(buffer);
3543
3544 if (!res) {
3545 Py_DECREF(reader);
3546 return NULL;
3547 }
3548 Py_DECREF(res);
3549
3550 }
3551
3552 Py_DECREF(reader);
3553
3554 res = expat_parse(self, "", 0, 1);
3555
3556 if (res && TreeBuilder_CheckExact(self->target)) {
3557 Py_DECREF(res);
3558 return treebuilder_done((TreeBuilderObject*) self->target);
3559 }
3560
3561 return res;
3562}
3563
Serhiy Storchakacb985562015-05-04 15:32:48 +03003564/*[clinic input]
3565_elementtree.XMLParser.doctype
3566
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003567 name: object
3568 pubid: object
3569 system: object
3570 /
3571
Serhiy Storchakacb985562015-05-04 15:32:48 +03003572[clinic start generated code]*/
3573
3574static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003575_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3576 PyObject *pubid, PyObject *system)
3577/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003578{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003579 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3580 "This method of XMLParser is deprecated. Define"
3581 " doctype() method on the TreeBuilder target.",
3582 1) < 0) {
3583 return NULL;
3584 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003585 Py_RETURN_NONE;
3586}
3587
Serhiy Storchakacb985562015-05-04 15:32:48 +03003588/*[clinic input]
3589_elementtree.XMLParser._setevents
3590
3591 events_queue: object(subclass_of='&PyList_Type')
3592 events_to_report: object = None
3593 /
3594
3595[clinic start generated code]*/
3596
3597static PyObject *
3598_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3599 PyObject *events_queue,
3600 PyObject *events_to_report)
3601/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003602{
3603 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003604 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003605 TreeBuilderObject *target;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003606 PyObject *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003607
3608 if (!TreeBuilder_CheckExact(self->target)) {
3609 PyErr_SetString(
3610 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003611 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003612 "targets"
3613 );
3614 return NULL;
3615 }
3616
3617 target = (TreeBuilderObject*) self->target;
3618
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 Py_INCREF(events_queue);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003620 Py_XSETREF(target->events, events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621
3622 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003623 Py_CLEAR(target->start_event_obj);
3624 Py_CLEAR(target->end_event_obj);
3625 Py_CLEAR(target->start_ns_event_obj);
3626 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003628 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003630 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003631 Py_RETURN_NONE;
3632 }
3633
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003634 if (!(events_seq = PySequence_Fast(events_to_report,
3635 "events must be a sequence"))) {
3636 return NULL;
3637 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003639 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003640 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3641 char *event_name = NULL;
3642 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003643 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003644 } else if (PyBytes_Check(event_name_obj)) {
3645 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003646 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003647 if (event_name == NULL) {
3648 Py_DECREF(events_seq);
3649 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3650 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003651 }
3652
3653 Py_INCREF(event_name_obj);
3654 if (strcmp(event_name, "start") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003655 Py_XSETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003656 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003657 Py_XSETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003658 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003659 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660 EXPAT(SetNamespaceDeclHandler)(
3661 self->parser,
3662 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3663 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3664 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003665 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchaka48842712016-04-06 09:45:48 +03003666 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003667 EXPAT(SetNamespaceDeclHandler)(
3668 self->parser,
3669 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3670 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3671 );
3672 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003673 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003674 Py_DECREF(events_seq);
3675 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003676 return NULL;
3677 }
3678 }
3679
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003680 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003681 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003682}
3683
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003684static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003685xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003687 if (PyUnicode_Check(nameobj)) {
3688 PyObject* res;
3689 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3690 res = self->entity;
3691 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3692 res = self->target;
3693 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3694 return PyUnicode_FromFormat(
3695 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003697 }
3698 else
3699 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003700
Alexander Belopolskye239d232010-12-08 23:31:48 +00003701 Py_INCREF(res);
3702 return res;
3703 }
3704 generic:
3705 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003706}
3707
Serhiy Storchakacb985562015-05-04 15:32:48 +03003708#include "clinic/_elementtree.c.h"
3709
3710static PyMethodDef element_methods[] = {
3711
3712 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3713
3714 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3715 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3716
3717 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3718 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3719 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3720
3721 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3722 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3723 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3724 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3725
3726 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3727 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3728 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3729
3730 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3731 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3732
3733 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3734 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3735
3736 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3737
3738 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3739 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3740 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3741 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3742 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3743
3744 {NULL, NULL}
3745};
3746
3747static PyMappingMethods element_as_mapping = {
3748 (lenfunc) element_length,
3749 (binaryfunc) element_subscr,
3750 (objobjargproc) element_ass_subscr,
3751};
3752
3753static PyTypeObject Element_Type = {
3754 PyVarObject_HEAD_INIT(NULL, 0)
3755 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3756 /* methods */
3757 (destructor)element_dealloc, /* tp_dealloc */
3758 0, /* tp_print */
3759 0, /* tp_getattr */
3760 0, /* tp_setattr */
3761 0, /* tp_reserved */
3762 (reprfunc)element_repr, /* tp_repr */
3763 0, /* tp_as_number */
3764 &element_as_sequence, /* tp_as_sequence */
3765 &element_as_mapping, /* tp_as_mapping */
3766 0, /* tp_hash */
3767 0, /* tp_call */
3768 0, /* tp_str */
3769 (getattrofunc)element_getattro, /* tp_getattro */
3770 (setattrofunc)element_setattro, /* tp_setattro */
3771 0, /* tp_as_buffer */
3772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3773 /* tp_flags */
3774 0, /* tp_doc */
3775 (traverseproc)element_gc_traverse, /* tp_traverse */
3776 (inquiry)element_gc_clear, /* tp_clear */
3777 0, /* tp_richcompare */
3778 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3779 0, /* tp_iter */
3780 0, /* tp_iternext */
3781 element_methods, /* tp_methods */
3782 0, /* tp_members */
3783 0, /* tp_getset */
3784 0, /* tp_base */
3785 0, /* tp_dict */
3786 0, /* tp_descr_get */
3787 0, /* tp_descr_set */
3788 0, /* tp_dictoffset */
3789 (initproc)element_init, /* tp_init */
3790 PyType_GenericAlloc, /* tp_alloc */
3791 element_new, /* tp_new */
3792 0, /* tp_free */
3793};
3794
3795static PyMethodDef treebuilder_methods[] = {
3796 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3797 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3798 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3799 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3800 {NULL, NULL}
3801};
3802
3803static PyTypeObject TreeBuilder_Type = {
3804 PyVarObject_HEAD_INIT(NULL, 0)
3805 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3806 /* methods */
3807 (destructor)treebuilder_dealloc, /* tp_dealloc */
3808 0, /* tp_print */
3809 0, /* tp_getattr */
3810 0, /* tp_setattr */
3811 0, /* tp_reserved */
3812 0, /* tp_repr */
3813 0, /* tp_as_number */
3814 0, /* tp_as_sequence */
3815 0, /* tp_as_mapping */
3816 0, /* tp_hash */
3817 0, /* tp_call */
3818 0, /* tp_str */
3819 0, /* tp_getattro */
3820 0, /* tp_setattro */
3821 0, /* tp_as_buffer */
3822 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3823 /* tp_flags */
3824 0, /* tp_doc */
3825 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3826 (inquiry)treebuilder_gc_clear, /* tp_clear */
3827 0, /* tp_richcompare */
3828 0, /* tp_weaklistoffset */
3829 0, /* tp_iter */
3830 0, /* tp_iternext */
3831 treebuilder_methods, /* tp_methods */
3832 0, /* tp_members */
3833 0, /* tp_getset */
3834 0, /* tp_base */
3835 0, /* tp_dict */
3836 0, /* tp_descr_get */
3837 0, /* tp_descr_set */
3838 0, /* tp_dictoffset */
3839 _elementtree_TreeBuilder___init__, /* tp_init */
3840 PyType_GenericAlloc, /* tp_alloc */
3841 treebuilder_new, /* tp_new */
3842 0, /* tp_free */
3843};
3844
3845static PyMethodDef xmlparser_methods[] = {
3846 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3847 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3848 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3849 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3850 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3851 {NULL, NULL}
3852};
3853
Neal Norwitz227b5332006-03-22 09:28:35 +00003854static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003855 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003856 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003857 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003858 (destructor)xmlparser_dealloc, /* tp_dealloc */
3859 0, /* tp_print */
3860 0, /* tp_getattr */
3861 0, /* tp_setattr */
3862 0, /* tp_reserved */
3863 0, /* tp_repr */
3864 0, /* tp_as_number */
3865 0, /* tp_as_sequence */
3866 0, /* tp_as_mapping */
3867 0, /* tp_hash */
3868 0, /* tp_call */
3869 0, /* tp_str */
3870 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3871 0, /* tp_setattro */
3872 0, /* tp_as_buffer */
3873 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3874 /* tp_flags */
3875 0, /* tp_doc */
3876 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3877 (inquiry)xmlparser_gc_clear, /* tp_clear */
3878 0, /* tp_richcompare */
3879 0, /* tp_weaklistoffset */
3880 0, /* tp_iter */
3881 0, /* tp_iternext */
3882 xmlparser_methods, /* tp_methods */
3883 0, /* tp_members */
3884 0, /* tp_getset */
3885 0, /* tp_base */
3886 0, /* tp_dict */
3887 0, /* tp_descr_get */
3888 0, /* tp_descr_set */
3889 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003890 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003891 PyType_GenericAlloc, /* tp_alloc */
3892 xmlparser_new, /* tp_new */
3893 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003894};
3895
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003896/* ==================================================================== */
3897/* python module interface */
3898
3899static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003900 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003901 {NULL, NULL}
3902};
3903
Martin v. Löwis1a214512008-06-11 05:26:20 +00003904
Eli Bendersky532d03e2013-08-10 08:00:39 -07003905static struct PyModuleDef elementtreemodule = {
3906 PyModuleDef_HEAD_INIT,
3907 "_elementtree",
3908 NULL,
3909 sizeof(elementtreestate),
3910 _functions,
3911 NULL,
3912 elementtree_traverse,
3913 elementtree_clear,
3914 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003915};
3916
Neal Norwitzf6657e62006-12-28 04:47:50 +00003917PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003918PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003919{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003920 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003921 elementtreestate *st;
3922
3923 m = PyState_FindModule(&elementtreemodule);
3924 if (m) {
3925 Py_INCREF(m);
3926 return m;
3927 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003929 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003930 if (PyType_Ready(&ElementIter_Type) < 0)
3931 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003932 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003933 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003934 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003935 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003936 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003937 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003938
Eli Bendersky532d03e2013-08-10 08:00:39 -07003939 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003940 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003941 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003942 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003943
Eli Bendersky828efde2012-04-05 05:40:58 +03003944 if (!(temp = PyImport_ImportModule("copy")))
3945 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003946 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003947 Py_XDECREF(temp);
3948
Eli Bendersky532d03e2013-08-10 08:00:39 -07003949 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003950 return NULL;
3951
Eli Bendersky20d41742012-06-01 09:48:37 +03003952 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003953 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3954 if (expat_capi) {
3955 /* check that it's usable */
3956 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003957 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003958 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3959 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003960 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003961 PyErr_SetString(PyExc_ImportError,
3962 "pyexpat version is incompatible");
3963 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003964 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003965 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003966 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003967 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003968
Eli Bendersky532d03e2013-08-10 08:00:39 -07003969 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003970 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003971 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003972 Py_INCREF(st->parseerror_obj);
3973 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003974
Eli Bendersky092af1f2012-03-04 07:14:03 +02003975 Py_INCREF((PyObject *)&Element_Type);
3976 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3977
Eli Bendersky58d548d2012-05-29 15:45:16 +03003978 Py_INCREF((PyObject *)&TreeBuilder_Type);
3979 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3980
Eli Bendersky52467b12012-06-01 07:13:08 +03003981 Py_INCREF((PyObject *)&XMLParser_Type);
3982 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003983
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003984 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003985}