blob: ebdb95b05ec9c9b11211636cb76663bc88cd3bdb [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Martin v. Löwisbce16662012-06-17 10:41:22 +0200850 Py_ssize_t result = sizeof(ElementObject);
851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
938 Py_CLEAR(self->tag);
939 self->tag = tag;
940 Py_INCREF(self->tag);
941
Eli Benderskydd3661e2013-09-13 06:24:25 -0700942 _clear_joined_ptr(&self->text);
943 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
944 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945
Eli Benderskydd3661e2013-09-13 06:24:25 -0700946 _clear_joined_ptr(&self->tail);
947 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
948 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949
950 /* Handle ATTRIB and CHILDREN. */
951 if (!children && !attrib)
952 Py_RETURN_NONE;
953
954 /* Compute 'nchildren'. */
955 if (children) {
956 if (!PyList_Check(children)) {
957 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
958 return NULL;
959 }
960 nchildren = PyList_Size(children);
961 }
962 else {
963 nchildren = 0;
964 }
965
966 /* Allocate 'extra'. */
967 if (element_resize(self, nchildren)) {
968 return NULL;
969 }
970 assert(self->extra && self->extra->allocated >= nchildren);
971
972 /* Copy children */
973 for (i = 0; i < nchildren; i++) {
974 self->extra->children[i] = PyList_GET_ITEM(children, i);
975 Py_INCREF(self->extra->children[i]);
976 }
977
978 self->extra->length = nchildren;
979 self->extra->allocated = nchildren;
980
981 /* Stash attrib. */
982 if (attrib) {
983 Py_CLEAR(self->extra->attrib);
984 self->extra->attrib = attrib;
985 Py_INCREF(attrib);
986 }
987
988 Py_RETURN_NONE;
989}
990
991/* __setstate__ for Element instance from the Python implementation.
992 * 'state' should be the instance dict.
993 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300994
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995static PyObject *
996element_setstate_from_Python(ElementObject *self, PyObject *state)
997{
998 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
999 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1000 PyObject *args;
1001 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001002 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004 tag = attrib = text = tail = children = NULL;
1005 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001008
1009 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1010 &attrib, &text, &tail, &children))
1011 retval = element_setstate_from_attributes(self, tag, attrib, text,
1012 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001014 retval = NULL;
1015
1016 Py_DECREF(args);
1017 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018}
1019
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020/*[clinic input]
1021_elementtree.Element.__setstate__
1022
1023 state: object
1024 /
1025
1026[clinic start generated code]*/
1027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001029_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1030/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031{
1032 if (!PyDict_CheckExact(state)) {
1033 PyErr_Format(PyExc_TypeError,
1034 "Don't know how to unpickle \"%.200R\" as an Element",
1035 state);
1036 return NULL;
1037 }
1038 else
1039 return element_setstate_from_Python(self, state);
1040}
1041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042LOCAL(int)
1043checkpath(PyObject* tag)
1044{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 Py_ssize_t i;
1046 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047
1048 /* check if a tag contains an xpath character */
1049
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050#define PATHCHAR(ch) \
1051 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1055 void *data = PyUnicode_DATA(tag);
1056 unsigned int kind = PyUnicode_KIND(tag);
1057 for (i = 0; i < len; i++) {
1058 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1059 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 return 1;
1065 }
1066 return 0;
1067 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001068 if (PyBytes_Check(tag)) {
1069 char *p = PyBytes_AS_STRING(tag);
1070 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071 if (p[i] == '{')
1072 check = 0;
1073 else if (p[i] == '}')
1074 check = 1;
1075 else if (check && PATHCHAR(p[i]))
1076 return 1;
1077 }
1078 return 0;
1079 }
1080
1081 return 1; /* unknown type; might be path expression */
1082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.extend
1086
1087 elements: object
1088 /
1089
1090[clinic start generated code]*/
1091
1092static PyObject *
1093_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1094/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095{
1096 PyObject* seq;
1097 Py_ssize_t i, seqlen = 0;
1098
Serhiy Storchakacb985562015-05-04 15:32:48 +03001099 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100 if (!seq) {
1101 PyErr_Format(
1102 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104 );
1105 return NULL;
1106 }
1107
1108 seqlen = PySequence_Size(seq);
1109 for (i = 0; i < seqlen; i++) {
1110 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001111 if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
1112 Py_DECREF(seq);
1113 PyErr_Format(
1114 PyExc_TypeError,
1115 "expected an Element, not \"%.200s\"",
1116 Py_TYPE(element)->tp_name);
1117 return NULL;
1118 }
1119
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001120 if (element_add_subelement(self, element) < 0) {
1121 Py_DECREF(seq);
1122 return NULL;
1123 }
1124 }
1125
1126 Py_DECREF(seq);
1127
1128 Py_RETURN_NONE;
1129}
1130
Serhiy Storchakacb985562015-05-04 15:32:48 +03001131/*[clinic input]
1132_elementtree.Element.find
1133
1134 path: object
1135 namespaces: object = None
1136
1137[clinic start generated code]*/
1138
1139static PyObject *
1140_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1141 PyObject *namespaces)
1142/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001143{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001144 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001145 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001146
Serhiy Storchakacb985562015-05-04 15:32:48 +03001147 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001148 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001150 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001151 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001153
1154 if (!self->extra)
1155 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001156
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001157 for (i = 0; i < self->extra->length; i++) {
1158 PyObject* item = self->extra->children[i];
1159 if (Element_CheckExact(item) &&
Serhiy Storchakacb985562015-05-04 15:32:48 +03001160 PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001161 Py_INCREF(item);
1162 return item;
1163 }
1164 }
1165
1166 Py_RETURN_NONE;
1167}
1168
Serhiy Storchakacb985562015-05-04 15:32:48 +03001169/*[clinic input]
1170_elementtree.Element.findtext
1171
1172 path: object
1173 default: object = None
1174 namespaces: object = None
1175
1176[clinic start generated code]*/
1177
1178static PyObject *
1179_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1180 PyObject *default_value,
1181 PyObject *namespaces)
1182/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001183{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001184 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001185 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001186 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001187
Serhiy Storchakacb985562015-05-04 15:32:48 +03001188 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001189 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001190 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191 );
1192
1193 if (!self->extra) {
1194 Py_INCREF(default_value);
1195 return default_value;
1196 }
1197
1198 for (i = 0; i < self->extra->length; i++) {
1199 ElementObject* item = (ElementObject*) self->extra->children[i];
Eli Bendersky163d7f02013-11-24 06:55:04 -08001200 if (Element_CheckExact(item) &&
Serhiy Storchakacb985562015-05-04 15:32:48 +03001201 (PyObject_RichCompareBool(item->tag, path, Py_EQ) == 1)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001202 PyObject* text = element_get_text(item);
1203 if (text == Py_None)
Eli Bendersky25771b32013-01-13 05:26:07 -08001204 return PyUnicode_New(0, 0);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001205 Py_XINCREF(text);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001206 return text;
1207 }
1208 }
1209
1210 Py_INCREF(default_value);
1211 return default_value;
1212}
1213
Serhiy Storchakacb985562015-05-04 15:32:48 +03001214/*[clinic input]
1215_elementtree.Element.findall
1216
1217 path: object
1218 namespaces: object = None
1219
1220[clinic start generated code]*/
1221
1222static PyObject *
1223_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1224 PyObject *namespaces)
1225/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001227 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001228 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001229 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001230 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001231
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001232 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001233 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001234 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001235 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001236 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001238
1239 out = PyList_New(0);
1240 if (!out)
1241 return NULL;
1242
1243 if (!self->extra)
1244 return out;
1245
1246 for (i = 0; i < self->extra->length; i++) {
1247 PyObject* item = self->extra->children[i];
1248 if (Element_CheckExact(item) &&
Mark Dickinson211c6252009-02-01 10:28:51 +00001249 PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001250 if (PyList_Append(out, item) < 0) {
1251 Py_DECREF(out);
1252 return NULL;
1253 }
1254 }
1255 }
1256
1257 return out;
1258}
1259
Serhiy Storchakacb985562015-05-04 15:32:48 +03001260/*[clinic input]
1261_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001262
Serhiy Storchakacb985562015-05-04 15:32:48 +03001263 path: object
1264 namespaces: object = None
1265
1266[clinic start generated code]*/
1267
1268static PyObject *
1269_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1270 PyObject *namespaces)
1271/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1272{
1273 PyObject* tag = path;
1274 _Py_IDENTIFIER(iterfind);
1275 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001276
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001277 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001278 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001279}
1280
Serhiy Storchakacb985562015-05-04 15:32:48 +03001281/*[clinic input]
1282_elementtree.Element.get
1283
1284 key: object
1285 default: object = None
1286
1287[clinic start generated code]*/
1288
1289static PyObject *
1290_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1291 PyObject *default_value)
1292/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001293{
1294 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001295
1296 if (!self->extra || self->extra->attrib == Py_None)
1297 value = default_value;
1298 else {
1299 value = PyDict_GetItem(self->extra->attrib, key);
1300 if (!value)
1301 value = default_value;
1302 }
1303
1304 Py_INCREF(value);
1305 return value;
1306}
1307
Serhiy Storchakacb985562015-05-04 15:32:48 +03001308/*[clinic input]
1309_elementtree.Element.getchildren
1310
1311[clinic start generated code]*/
1312
1313static PyObject *
1314_elementtree_Element_getchildren_impl(ElementObject *self)
1315/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001316{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001317 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001318 PyObject* list;
1319
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001320 /* FIXME: report as deprecated? */
1321
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001322 if (!self->extra)
1323 return PyList_New(0);
1324
1325 list = PyList_New(self->extra->length);
1326 if (!list)
1327 return NULL;
1328
1329 for (i = 0; i < self->extra->length; i++) {
1330 PyObject* item = self->extra->children[i];
1331 Py_INCREF(item);
1332 PyList_SET_ITEM(list, i, item);
1333 }
1334
1335 return list;
1336}
1337
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001338
Eli Bendersky64d11e62012-06-15 07:42:50 +03001339static PyObject *
1340create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1341
1342
Serhiy Storchakacb985562015-05-04 15:32:48 +03001343/*[clinic input]
1344_elementtree.Element.iter
1345
1346 tag: object = None
1347
1348[clinic start generated code]*/
1349
Eli Bendersky64d11e62012-06-15 07:42:50 +03001350static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001351_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1352/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001353{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001354 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001355}
1356
1357
Serhiy Storchakacb985562015-05-04 15:32:48 +03001358/*[clinic input]
1359_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001360
Serhiy Storchakacb985562015-05-04 15:32:48 +03001361[clinic start generated code]*/
1362
1363static PyObject *
1364_elementtree_Element_itertext_impl(ElementObject *self)
1365/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1366{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001367 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001368}
1369
Eli Bendersky64d11e62012-06-15 07:42:50 +03001370
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001371static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001372element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001373{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001374 ElementObject* self = (ElementObject*) self_;
1375
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001376 if (!self->extra || index < 0 || index >= self->extra->length) {
1377 PyErr_SetString(
1378 PyExc_IndexError,
1379 "child index out of range"
1380 );
1381 return NULL;
1382 }
1383
1384 Py_INCREF(self->extra->children[index]);
1385 return self->extra->children[index];
1386}
1387
Serhiy Storchakacb985562015-05-04 15:32:48 +03001388/*[clinic input]
1389_elementtree.Element.insert
1390
1391 index: Py_ssize_t
1392 subelement: object(subclass_of='&Element_Type')
1393 /
1394
1395[clinic start generated code]*/
1396
1397static PyObject *
1398_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1399 PyObject *subelement)
1400/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001402 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001403
Victor Stinner5f0af232013-07-11 23:01:36 +02001404 if (!self->extra) {
1405 if (create_extra(self, NULL) < 0)
1406 return NULL;
1407 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001408
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001409 if (index < 0) {
1410 index += self->extra->length;
1411 if (index < 0)
1412 index = 0;
1413 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001414 if (index > self->extra->length)
1415 index = self->extra->length;
1416
1417 if (element_resize(self, 1) < 0)
1418 return NULL;
1419
1420 for (i = self->extra->length; i > index; i--)
1421 self->extra->children[i] = self->extra->children[i-1];
1422
Serhiy Storchakacb985562015-05-04 15:32:48 +03001423 Py_INCREF(subelement);
1424 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001425
1426 self->extra->length++;
1427
1428 Py_RETURN_NONE;
1429}
1430
Serhiy Storchakacb985562015-05-04 15:32:48 +03001431/*[clinic input]
1432_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001433
Serhiy Storchakacb985562015-05-04 15:32:48 +03001434[clinic start generated code]*/
1435
1436static PyObject *
1437_elementtree_Element_items_impl(ElementObject *self)
1438/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1439{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001440 if (!self->extra || self->extra->attrib == Py_None)
1441 return PyList_New(0);
1442
1443 return PyDict_Items(self->extra->attrib);
1444}
1445
Serhiy Storchakacb985562015-05-04 15:32:48 +03001446/*[clinic input]
1447_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001448
Serhiy Storchakacb985562015-05-04 15:32:48 +03001449[clinic start generated code]*/
1450
1451static PyObject *
1452_elementtree_Element_keys_impl(ElementObject *self)
1453/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1454{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455 if (!self->extra || self->extra->attrib == Py_None)
1456 return PyList_New(0);
1457
1458 return PyDict_Keys(self->extra->attrib);
1459}
1460
Martin v. Löwis18e16552006-02-15 17:27:45 +00001461static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462element_length(ElementObject* self)
1463{
1464 if (!self->extra)
1465 return 0;
1466
1467 return self->extra->length;
1468}
1469
Serhiy Storchakacb985562015-05-04 15:32:48 +03001470/*[clinic input]
1471_elementtree.Element.makeelement
1472
1473 tag: object
1474 attrib: object
1475 /
1476
1477[clinic start generated code]*/
1478
1479static PyObject *
1480_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1481 PyObject *attrib)
1482/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001483{
1484 PyObject* elem;
1485
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486 attrib = PyDict_Copy(attrib);
1487 if (!attrib)
1488 return NULL;
1489
Eli Bendersky092af1f2012-03-04 07:14:03 +02001490 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001491
1492 Py_DECREF(attrib);
1493
1494 return elem;
1495}
1496
Serhiy Storchakacb985562015-05-04 15:32:48 +03001497/*[clinic input]
1498_elementtree.Element.remove
1499
1500 subelement: object(subclass_of='&Element_Type')
1501 /
1502
1503[clinic start generated code]*/
1504
1505static PyObject *
1506_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1507/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001508{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001509 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001511 if (!self->extra) {
1512 /* element has no children, so raise exception */
1513 PyErr_SetString(
1514 PyExc_ValueError,
1515 "list.remove(x): x not in list"
1516 );
1517 return NULL;
1518 }
1519
1520 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001521 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522 break;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001523 if (PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ) == 1)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524 break;
1525 }
1526
1527 if (i == self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001528 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529 PyErr_SetString(
1530 PyExc_ValueError,
1531 "list.remove(x): x not in list"
1532 );
1533 return NULL;
1534 }
1535
1536 Py_DECREF(self->extra->children[i]);
1537
1538 self->extra->length--;
1539
1540 for (; i < self->extra->length; i++)
1541 self->extra->children[i] = self->extra->children[i+1];
1542
1543 Py_RETURN_NONE;
1544}
1545
1546static PyObject*
1547element_repr(ElementObject* self)
1548{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001549 if (self->tag)
1550 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1551 else
1552 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001553}
1554
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555/*[clinic input]
1556_elementtree.Element.set
1557
1558 key: object
1559 value: object
1560 /
1561
1562[clinic start generated code]*/
1563
1564static PyObject *
1565_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1566 PyObject *value)
1567/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001568{
1569 PyObject* attrib;
1570
Victor Stinner5f0af232013-07-11 23:01:36 +02001571 if (!self->extra) {
1572 if (create_extra(self, NULL) < 0)
1573 return NULL;
1574 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
1576 attrib = element_get_attrib(self);
1577 if (!attrib)
1578 return NULL;
1579
1580 if (PyDict_SetItem(attrib, key, value) < 0)
1581 return NULL;
1582
1583 Py_RETURN_NONE;
1584}
1585
1586static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001587element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001588{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001589 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001590 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591 PyObject* old;
1592
1593 if (!self->extra || index < 0 || index >= self->extra->length) {
1594 PyErr_SetString(
1595 PyExc_IndexError,
1596 "child assignment index out of range");
1597 return -1;
1598 }
1599
1600 old = self->extra->children[index];
1601
1602 if (item) {
1603 Py_INCREF(item);
1604 self->extra->children[index] = item;
1605 } else {
1606 self->extra->length--;
1607 for (i = index; i < self->extra->length; i++)
1608 self->extra->children[i] = self->extra->children[i+1];
1609 }
1610
1611 Py_DECREF(old);
1612
1613 return 0;
1614}
1615
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001616static PyObject*
1617element_subscr(PyObject* self_, PyObject* item)
1618{
1619 ElementObject* self = (ElementObject*) self_;
1620
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001621 if (PyIndex_Check(item)) {
1622 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001623
1624 if (i == -1 && PyErr_Occurred()) {
1625 return NULL;
1626 }
1627 if (i < 0 && self->extra)
1628 i += self->extra->length;
1629 return element_getitem(self_, i);
1630 }
1631 else if (PySlice_Check(item)) {
1632 Py_ssize_t start, stop, step, slicelen, cur, i;
1633 PyObject* list;
1634
1635 if (!self->extra)
1636 return PyList_New(0);
1637
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001638 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001639 self->extra->length,
1640 &start, &stop, &step, &slicelen) < 0) {
1641 return NULL;
1642 }
1643
1644 if (slicelen <= 0)
1645 return PyList_New(0);
1646 else {
1647 list = PyList_New(slicelen);
1648 if (!list)
1649 return NULL;
1650
1651 for (cur = start, i = 0; i < slicelen;
1652 cur += step, i++) {
1653 PyObject* item = self->extra->children[cur];
1654 Py_INCREF(item);
1655 PyList_SET_ITEM(list, i, item);
1656 }
1657
1658 return list;
1659 }
1660 }
1661 else {
1662 PyErr_SetString(PyExc_TypeError,
1663 "element indices must be integers");
1664 return NULL;
1665 }
1666}
1667
1668static int
1669element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1670{
1671 ElementObject* self = (ElementObject*) self_;
1672
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001673 if (PyIndex_Check(item)) {
1674 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001675
1676 if (i == -1 && PyErr_Occurred()) {
1677 return -1;
1678 }
1679 if (i < 0 && self->extra)
1680 i += self->extra->length;
1681 return element_setitem(self_, i, value);
1682 }
1683 else if (PySlice_Check(item)) {
1684 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1685
1686 PyObject* recycle = NULL;
1687 PyObject* seq = NULL;
1688
Victor Stinner5f0af232013-07-11 23:01:36 +02001689 if (!self->extra) {
1690 if (create_extra(self, NULL) < 0)
1691 return -1;
1692 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001693
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001694 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001695 self->extra->length,
1696 &start, &stop, &step, &slicelen) < 0) {
1697 return -1;
1698 }
1699
Eli Bendersky865756a2012-03-09 13:38:15 +02001700 if (value == NULL) {
1701 /* Delete slice */
1702 size_t cur;
1703 Py_ssize_t i;
1704
1705 if (slicelen <= 0)
1706 return 0;
1707
1708 /* Since we're deleting, the direction of the range doesn't matter,
1709 * so for simplicity make it always ascending.
1710 */
1711 if (step < 0) {
1712 stop = start + 1;
1713 start = stop + step * (slicelen - 1) - 1;
1714 step = -step;
1715 }
1716
1717 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1718
1719 /* recycle is a list that will contain all the children
1720 * scheduled for removal.
1721 */
1722 if (!(recycle = PyList_New(slicelen))) {
1723 PyErr_NoMemory();
1724 return -1;
1725 }
1726
1727 /* This loop walks over all the children that have to be deleted,
1728 * with cur pointing at them. num_moved is the amount of children
1729 * until the next deleted child that have to be "shifted down" to
1730 * occupy the deleted's places.
1731 * Note that in the ith iteration, shifting is done i+i places down
1732 * because i children were already removed.
1733 */
1734 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1735 /* Compute how many children have to be moved, clipping at the
1736 * list end.
1737 */
1738 Py_ssize_t num_moved = step - 1;
1739 if (cur + step >= (size_t)self->extra->length) {
1740 num_moved = self->extra->length - cur - 1;
1741 }
1742
1743 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1744
1745 memmove(
1746 self->extra->children + cur - i,
1747 self->extra->children + cur + 1,
1748 num_moved * sizeof(PyObject *));
1749 }
1750
1751 /* Leftover "tail" after the last removed child */
1752 cur = start + (size_t)slicelen * step;
1753 if (cur < (size_t)self->extra->length) {
1754 memmove(
1755 self->extra->children + cur - slicelen,
1756 self->extra->children + cur,
1757 (self->extra->length - cur) * sizeof(PyObject *));
1758 }
1759
1760 self->extra->length -= slicelen;
1761
1762 /* Discard the recycle list with all the deleted sub-elements */
1763 Py_XDECREF(recycle);
1764 return 0;
1765 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001766 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001767 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001768 seq = PySequence_Fast(value, "");
1769 if (!seq) {
1770 PyErr_Format(
1771 PyExc_TypeError,
1772 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1773 );
1774 return -1;
1775 }
1776 newlen = PySequence_Size(seq);
1777 }
1778
1779 if (step != 1 && newlen != slicelen)
1780 {
1781 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001782 "attempt to assign sequence of size %zd "
1783 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001784 newlen, slicelen
1785 );
1786 return -1;
1787 }
1788
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001789 /* Resize before creating the recycle bin, to prevent refleaks. */
1790 if (newlen > slicelen) {
1791 if (element_resize(self, newlen - slicelen) < 0) {
1792 if (seq) {
1793 Py_DECREF(seq);
1794 }
1795 return -1;
1796 }
1797 }
1798
1799 if (slicelen > 0) {
1800 /* to avoid recursive calls to this method (via decref), move
1801 old items to the recycle bin here, and get rid of them when
1802 we're done modifying the element */
1803 recycle = PyList_New(slicelen);
1804 if (!recycle) {
1805 if (seq) {
1806 Py_DECREF(seq);
1807 }
1808 return -1;
1809 }
1810 for (cur = start, i = 0; i < slicelen;
1811 cur += step, i++)
1812 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1813 }
1814
1815 if (newlen < slicelen) {
1816 /* delete slice */
1817 for (i = stop; i < self->extra->length; i++)
1818 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1819 } else if (newlen > slicelen) {
1820 /* insert slice */
1821 for (i = self->extra->length-1; i >= stop; i--)
1822 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1823 }
1824
1825 /* replace the slice */
1826 for (cur = start, i = 0; i < newlen;
1827 cur += step, i++) {
1828 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1829 Py_INCREF(element);
1830 self->extra->children[cur] = element;
1831 }
1832
1833 self->extra->length += newlen - slicelen;
1834
1835 if (seq) {
1836 Py_DECREF(seq);
1837 }
1838
1839 /* discard the recycle bin, and everything in it */
1840 Py_XDECREF(recycle);
1841
1842 return 0;
1843 }
1844 else {
1845 PyErr_SetString(PyExc_TypeError,
1846 "element indices must be integers");
1847 return -1;
1848 }
1849}
1850
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001851static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001852element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001853{
1854 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001855 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001856
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001857 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001858 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001859
Alexander Belopolskye239d232010-12-08 23:31:48 +00001860 if (name == NULL)
1861 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001862
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001863 /* handle common attributes first */
1864 if (strcmp(name, "tag") == 0) {
1865 res = self->tag;
1866 Py_INCREF(res);
1867 return res;
1868 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001869 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001870 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001871 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001872 }
1873
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001874 /* methods */
1875 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1876 if (res)
1877 return res;
1878
1879 /* less common attributes */
1880 if (strcmp(name, "tail") == 0) {
1881 PyErr_Clear();
1882 res = element_get_tail(self);
1883 } else if (strcmp(name, "attrib") == 0) {
1884 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001885 if (!self->extra) {
1886 if (create_extra(self, NULL) < 0)
1887 return NULL;
1888 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001889 res = element_get_attrib(self);
1890 }
1891
1892 if (!res)
1893 return NULL;
1894
1895 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001896 return res;
1897}
1898
Eli Benderskyef9683b2013-05-18 07:52:34 -07001899static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001900element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901{
Eli Benderskyb20df952012-05-20 06:33:29 +03001902 char *name = "";
1903 if (PyUnicode_Check(nameobj))
1904 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001905 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001906 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001907
1908 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001909 Py_DECREF(self->tag);
1910 self->tag = value;
1911 Py_INCREF(self->tag);
1912 } else if (strcmp(name, "text") == 0) {
1913 Py_DECREF(JOIN_OBJ(self->text));
1914 self->text = value;
1915 Py_INCREF(self->text);
1916 } else if (strcmp(name, "tail") == 0) {
1917 Py_DECREF(JOIN_OBJ(self->tail));
1918 self->tail = value;
1919 Py_INCREF(self->tail);
1920 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001921 if (!self->extra) {
1922 if (create_extra(self, NULL) < 0)
1923 return -1;
1924 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001925 Py_DECREF(self->extra->attrib);
1926 self->extra->attrib = value;
1927 Py_INCREF(self->extra->attrib);
1928 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001929 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001930 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001931 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932 }
1933
Eli Benderskyef9683b2013-05-18 07:52:34 -07001934 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001935}
1936
1937static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001938 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001939 0, /* sq_concat */
1940 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001941 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001942 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001944 0,
1945};
1946
Eli Bendersky64d11e62012-06-15 07:42:50 +03001947/******************************* Element iterator ****************************/
1948
1949/* ElementIterObject represents the iteration state over an XML element in
1950 * pre-order traversal. To keep track of which sub-element should be returned
1951 * next, a stack of parents is maintained. This is a standard stack-based
1952 * iterative pre-order traversal of a tree.
1953 * The stack is managed using a single-linked list starting at parent_stack.
1954 * Each stack node contains the saved parent to which we should return after
1955 * the current one is exhausted, and the next child to examine in that parent.
1956 */
1957typedef struct ParentLocator_t {
1958 ElementObject *parent;
1959 Py_ssize_t child_index;
1960 struct ParentLocator_t *next;
1961} ParentLocator;
1962
1963typedef struct {
1964 PyObject_HEAD
1965 ParentLocator *parent_stack;
1966 ElementObject *root_element;
1967 PyObject *sought_tag;
1968 int root_done;
1969 int gettext;
1970} ElementIterObject;
1971
1972
1973static void
1974elementiter_dealloc(ElementIterObject *it)
1975{
1976 ParentLocator *p = it->parent_stack;
1977 while (p) {
1978 ParentLocator *temp = p;
1979 Py_XDECREF(p->parent);
1980 p = p->next;
1981 PyObject_Free(temp);
1982 }
1983
1984 Py_XDECREF(it->sought_tag);
1985 Py_XDECREF(it->root_element);
1986
1987 PyObject_GC_UnTrack(it);
1988 PyObject_GC_Del(it);
1989}
1990
1991static int
1992elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
1993{
1994 ParentLocator *p = it->parent_stack;
1995 while (p) {
1996 Py_VISIT(p->parent);
1997 p = p->next;
1998 }
1999
2000 Py_VISIT(it->root_element);
2001 Py_VISIT(it->sought_tag);
2002 return 0;
2003}
2004
2005/* Helper function for elementiter_next. Add a new parent to the parent stack.
2006 */
2007static ParentLocator *
2008parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2009{
2010 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2011 if (new_node) {
2012 new_node->parent = parent;
2013 Py_INCREF(parent);
2014 new_node->child_index = 0;
2015 new_node->next = stack;
2016 }
2017 return new_node;
2018}
2019
2020static PyObject *
2021elementiter_next(ElementIterObject *it)
2022{
2023 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002024 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002025 * A short note on gettext: this function serves both the iter() and
2026 * itertext() methods to avoid code duplication. However, there are a few
2027 * small differences in the way these iterations work. Namely:
2028 * - itertext() only yields text from nodes that have it, and continues
2029 * iterating when a node doesn't have text (so it doesn't return any
2030 * node like iter())
2031 * - itertext() also has to handle tail, after finishing with all the
2032 * children of a node.
2033 */
Eli Bendersky113da642012-06-15 07:52:49 +03002034 ElementObject *cur_parent;
2035 Py_ssize_t child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002036
2037 while (1) {
2038 /* Handle the case reached in the beginning and end of iteration, where
2039 * the parent stack is empty. The root_done flag gives us indication
2040 * whether we've just started iterating (so root_done is 0), in which
2041 * case the root is returned. If root_done is 1 and we're here, the
2042 * iterator is exhausted.
2043 */
2044 if (!it->parent_stack->parent) {
2045 if (it->root_done) {
2046 PyErr_SetNone(PyExc_StopIteration);
2047 return NULL;
2048 } else {
2049 it->parent_stack = parent_stack_push_new(it->parent_stack,
2050 it->root_element);
2051 if (!it->parent_stack) {
2052 PyErr_NoMemory();
2053 return NULL;
2054 }
2055
2056 it->root_done = 1;
2057 if (it->sought_tag == Py_None ||
2058 PyObject_RichCompareBool(it->root_element->tag,
2059 it->sought_tag, Py_EQ) == 1) {
2060 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002061 PyObject *text = element_get_text(it->root_element);
2062 if (!text)
2063 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002064 if (PyObject_IsTrue(text)) {
2065 Py_INCREF(text);
2066 return text;
2067 }
2068 } else {
2069 Py_INCREF(it->root_element);
2070 return (PyObject *)it->root_element;
2071 }
2072 }
2073 }
2074 }
2075
2076 /* See if there are children left to traverse in the current parent. If
2077 * yes, visit the next child. If not, pop the stack and try again.
2078 */
Eli Bendersky113da642012-06-15 07:52:49 +03002079 cur_parent = it->parent_stack->parent;
2080 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002081 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2082 ElementObject *child = (ElementObject *)
2083 cur_parent->extra->children[child_index];
2084 it->parent_stack->child_index++;
2085 it->parent_stack = parent_stack_push_new(it->parent_stack,
2086 child);
2087 if (!it->parent_stack) {
2088 PyErr_NoMemory();
2089 return NULL;
2090 }
2091
2092 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002093 PyObject *text = element_get_text(child);
2094 if (!text)
2095 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002096 if (PyObject_IsTrue(text)) {
2097 Py_INCREF(text);
2098 return text;
2099 }
2100 } else if (it->sought_tag == Py_None ||
2101 PyObject_RichCompareBool(child->tag,
2102 it->sought_tag, Py_EQ) == 1) {
2103 Py_INCREF(child);
2104 return (PyObject *)child;
2105 }
2106 else
2107 continue;
2108 }
2109 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002110 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002111 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002112 if (it->gettext) {
2113 tail = element_get_tail(cur_parent);
2114 if (!tail)
2115 return NULL;
2116 }
2117 else
2118 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 Py_XDECREF(it->parent_stack->parent);
2120 PyObject_Free(it->parent_stack);
2121 it->parent_stack = next;
2122
2123 /* Note that extra condition on it->parent_stack->parent here;
2124 * this is because itertext() is supposed to only return *inner*
2125 * text, not text following the element it began iteration with.
2126 */
2127 if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
2128 Py_INCREF(tail);
2129 return tail;
2130 }
2131 }
2132 }
2133
2134 return NULL;
2135}
2136
2137
2138static PyTypeObject ElementIter_Type = {
2139 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002140 /* Using the module's name since the pure-Python implementation does not
2141 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002142 "_elementtree._element_iterator", /* tp_name */
2143 sizeof(ElementIterObject), /* tp_basicsize */
2144 0, /* tp_itemsize */
2145 /* methods */
2146 (destructor)elementiter_dealloc, /* tp_dealloc */
2147 0, /* tp_print */
2148 0, /* tp_getattr */
2149 0, /* tp_setattr */
2150 0, /* tp_reserved */
2151 0, /* tp_repr */
2152 0, /* tp_as_number */
2153 0, /* tp_as_sequence */
2154 0, /* tp_as_mapping */
2155 0, /* tp_hash */
2156 0, /* tp_call */
2157 0, /* tp_str */
2158 0, /* tp_getattro */
2159 0, /* tp_setattro */
2160 0, /* tp_as_buffer */
2161 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2162 0, /* tp_doc */
2163 (traverseproc)elementiter_traverse, /* tp_traverse */
2164 0, /* tp_clear */
2165 0, /* tp_richcompare */
2166 0, /* tp_weaklistoffset */
2167 PyObject_SelfIter, /* tp_iter */
2168 (iternextfunc)elementiter_next, /* tp_iternext */
2169 0, /* tp_methods */
2170 0, /* tp_members */
2171 0, /* tp_getset */
2172 0, /* tp_base */
2173 0, /* tp_dict */
2174 0, /* tp_descr_get */
2175 0, /* tp_descr_set */
2176 0, /* tp_dictoffset */
2177 0, /* tp_init */
2178 0, /* tp_alloc */
2179 0, /* tp_new */
2180};
2181
2182
2183static PyObject *
2184create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2185{
2186 ElementIterObject *it;
2187 PyObject *star = NULL;
2188
2189 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2190 if (!it)
2191 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002192
2193 if (PyUnicode_Check(tag))
2194 star = PyUnicode_FromString("*");
2195 else if (PyBytes_Check(tag))
2196 star = PyBytes_FromString("*");
2197
2198 if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
2199 tag = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 Py_XDECREF(star);
Victor Stinner4d463432013-07-11 23:05:03 +02002201
2202 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002203 it->sought_tag = tag;
2204 it->root_done = 0;
2205 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002206 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002207 it->root_element = self;
2208
Eli Bendersky64d11e62012-06-15 07:42:50 +03002209 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002210
2211 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2212 if (it->parent_stack == NULL) {
2213 Py_DECREF(it);
2214 PyErr_NoMemory();
2215 return NULL;
2216 }
2217 it->parent_stack->parent = NULL;
2218 it->parent_stack->child_index = 0;
2219 it->parent_stack->next = NULL;
2220
Eli Bendersky64d11e62012-06-15 07:42:50 +03002221 return (PyObject *)it;
2222}
2223
2224
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002225/* ==================================================================== */
2226/* the tree builder type */
2227
2228typedef struct {
2229 PyObject_HEAD
2230
Eli Bendersky58d548d2012-05-29 15:45:16 +03002231 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002232
Antoine Pitrouee329312012-10-04 19:53:29 +02002233 PyObject *this; /* current node */
2234 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002235
Eli Bendersky58d548d2012-05-29 15:45:16 +03002236 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002237
Eli Bendersky58d548d2012-05-29 15:45:16 +03002238 PyObject *stack; /* element stack */
2239 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002240
Eli Bendersky48d358b2012-05-30 17:57:50 +03002241 PyObject *element_factory;
2242
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002243 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002244 PyObject *events; /* list of events, or NULL if not collecting */
2245 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2246 PyObject *end_event_obj;
2247 PyObject *start_ns_event_obj;
2248 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002249} TreeBuilderObject;
2250
Christian Heimes90aa7642007-12-19 02:45:37 +00002251#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002252
2253/* -------------------------------------------------------------------- */
2254/* constructor and destructor */
2255
Eli Bendersky58d548d2012-05-29 15:45:16 +03002256static PyObject *
2257treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002258{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002259 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2260 if (t != NULL) {
2261 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002262
Eli Bendersky58d548d2012-05-29 15:45:16 +03002263 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002264 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002265 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002266 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002267
Eli Bendersky58d548d2012-05-29 15:45:16 +03002268 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002269 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002270 t->stack = PyList_New(20);
2271 if (!t->stack) {
2272 Py_DECREF(t->this);
2273 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002274 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002275 return NULL;
2276 }
2277 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002278
Eli Bendersky58d548d2012-05-29 15:45:16 +03002279 t->events = NULL;
2280 t->start_event_obj = t->end_event_obj = NULL;
2281 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2282 }
2283 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284}
2285
Serhiy Storchakacb985562015-05-04 15:32:48 +03002286/*[clinic input]
2287_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288
Serhiy Storchakacb985562015-05-04 15:32:48 +03002289 element_factory: object = NULL
2290
2291[clinic start generated code]*/
2292
2293static int
2294_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2295 PyObject *element_factory)
2296/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2297{
2298 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002299
2300 if (element_factory) {
2301 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002302 tmp = self->element_factory;
2303 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002304 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002305 }
2306
Eli Bendersky58d548d2012-05-29 15:45:16 +03002307 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002308}
2309
Eli Bendersky48d358b2012-05-30 17:57:50 +03002310static int
2311treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2312{
2313 Py_VISIT(self->root);
2314 Py_VISIT(self->this);
2315 Py_VISIT(self->last);
2316 Py_VISIT(self->data);
2317 Py_VISIT(self->stack);
2318 Py_VISIT(self->element_factory);
2319 return 0;
2320}
2321
2322static int
2323treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002325 Py_CLEAR(self->end_ns_event_obj);
2326 Py_CLEAR(self->start_ns_event_obj);
2327 Py_CLEAR(self->end_event_obj);
2328 Py_CLEAR(self->start_event_obj);
2329 Py_CLEAR(self->events);
2330 Py_CLEAR(self->stack);
2331 Py_CLEAR(self->data);
2332 Py_CLEAR(self->last);
2333 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002334 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002335 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002336 return 0;
2337}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338
Eli Bendersky48d358b2012-05-30 17:57:50 +03002339static void
2340treebuilder_dealloc(TreeBuilderObject *self)
2341{
2342 PyObject_GC_UnTrack(self);
2343 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002345}
2346
2347/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002348/* helpers for handling of arbitrary element-like objects */
2349
2350static int
2351treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2352 PyObject **dest, _Py_Identifier *name)
2353{
2354 if (Element_CheckExact(element)) {
2355 Py_DECREF(JOIN_OBJ(*dest));
2356 *dest = JOIN_SET(data, PyList_CheckExact(data));
2357 return 0;
2358 }
2359 else {
2360 PyObject *joined = list_join(data);
2361 int r;
2362 if (joined == NULL)
2363 return -1;
2364 r = _PyObject_SetAttrId(element, name, joined);
2365 Py_DECREF(joined);
2366 return r;
2367 }
2368}
2369
2370/* These two functions steal a reference to data */
2371static int
2372treebuilder_set_element_text(PyObject *element, PyObject *data)
2373{
2374 _Py_IDENTIFIER(text);
2375 return treebuilder_set_element_text_or_tail(
2376 element, data, &((ElementObject *) element)->text, &PyId_text);
2377}
2378
2379static int
2380treebuilder_set_element_tail(PyObject *element, PyObject *data)
2381{
2382 _Py_IDENTIFIER(tail);
2383 return treebuilder_set_element_text_or_tail(
2384 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2385}
2386
2387static int
2388treebuilder_add_subelement(PyObject *element, PyObject *child)
2389{
2390 _Py_IDENTIFIER(append);
2391 if (Element_CheckExact(element)) {
2392 ElementObject *elem = (ElementObject *) element;
2393 return element_add_subelement(elem, child);
2394 }
2395 else {
2396 PyObject *res;
2397 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2398 if (res == NULL)
2399 return -1;
2400 Py_DECREF(res);
2401 return 0;
2402 }
2403}
2404
2405/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002406/* handlers */
2407
2408LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002409treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2410 PyObject* attrib)
2411{
2412 PyObject* node;
2413 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002414 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002415
2416 if (self->data) {
2417 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002418 if (treebuilder_set_element_text(self->last, self->data))
2419 return NULL;
2420 }
2421 else {
2422 if (treebuilder_set_element_tail(self->last, self->data))
2423 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002424 }
2425 self->data = NULL;
2426 }
2427
Eli Bendersky08231a92013-05-18 15:47:16 -07002428 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002429 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2430 } else {
2431 node = create_new_element(tag, attrib);
2432 }
2433 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002434 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002435 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002436
Antoine Pitrouee329312012-10-04 19:53:29 +02002437 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002438
2439 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002440 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002441 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002442 } else {
2443 if (self->root) {
2444 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002445 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002446 "multiple elements on top level"
2447 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002448 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002449 }
2450 Py_INCREF(node);
2451 self->root = node;
2452 }
2453
2454 if (self->index < PyList_GET_SIZE(self->stack)) {
2455 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002456 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002457 Py_INCREF(this);
2458 } else {
2459 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002460 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002461 }
2462 self->index++;
2463
2464 Py_DECREF(this);
2465 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002466 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002467
2468 Py_DECREF(self->last);
2469 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002470 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002471
2472 if (self->start_event_obj) {
2473 PyObject* res;
2474 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002475 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002476 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002477 PyList_Append(self->events, res);
2478 Py_DECREF(res);
2479 } else
2480 PyErr_Clear(); /* FIXME: propagate error */
2481 }
2482
2483 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002484
2485 error:
2486 Py_DECREF(node);
2487 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488}
2489
2490LOCAL(PyObject*)
2491treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2492{
2493 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002494 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002495 /* ignore calls to data before the first call to start */
2496 Py_RETURN_NONE;
2497 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498 /* store the first item as is */
2499 Py_INCREF(data); self->data = data;
2500 } else {
2501 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002502 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2503 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002504 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 /* expat often generates single character data sections; handle
2506 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002507 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2508 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002510 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511 } else if (PyList_CheckExact(self->data)) {
2512 if (PyList_Append(self->data, data) < 0)
2513 return NULL;
2514 } else {
2515 PyObject* list = PyList_New(2);
2516 if (!list)
2517 return NULL;
2518 PyList_SET_ITEM(list, 0, self->data);
2519 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2520 self->data = list;
2521 }
2522 }
2523
2524 Py_RETURN_NONE;
2525}
2526
2527LOCAL(PyObject*)
2528treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2529{
2530 PyObject* item;
2531
2532 if (self->data) {
2533 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002534 if (treebuilder_set_element_text(self->last, self->data))
2535 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002536 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002537 if (treebuilder_set_element_tail(self->last, self->data))
2538 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 }
2540 self->data = NULL;
2541 }
2542
2543 if (self->index == 0) {
2544 PyErr_SetString(
2545 PyExc_IndexError,
2546 "pop from empty stack"
2547 );
2548 return NULL;
2549 }
2550
2551 self->index--;
2552
2553 item = PyList_GET_ITEM(self->stack, self->index);
2554 Py_INCREF(item);
2555
2556 Py_DECREF(self->last);
2557
Antoine Pitrouee329312012-10-04 19:53:29 +02002558 self->last = self->this;
2559 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560
2561 if (self->end_event_obj) {
2562 PyObject* res;
2563 PyObject* action = self->end_event_obj;
2564 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002565 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567 PyList_Append(self->events, res);
2568 Py_DECREF(res);
2569 } else
2570 PyErr_Clear(); /* FIXME: propagate error */
2571 }
2572
2573 Py_INCREF(self->last);
2574 return (PyObject*) self->last;
2575}
2576
2577LOCAL(void)
2578treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002579 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002580{
2581 PyObject* res;
2582 PyObject* action;
2583 PyObject* parcel;
2584
2585 if (!self->events)
2586 return;
2587
2588 if (start) {
2589 if (!self->start_ns_event_obj)
2590 return;
2591 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002592 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002593 if (!parcel)
2594 return;
2595 Py_INCREF(action);
2596 } else {
2597 if (!self->end_ns_event_obj)
2598 return;
2599 action = self->end_ns_event_obj;
2600 Py_INCREF(action);
2601 parcel = Py_None;
2602 Py_INCREF(parcel);
2603 }
2604
2605 res = PyTuple_New(2);
2606
2607 if (res) {
2608 PyTuple_SET_ITEM(res, 0, action);
2609 PyTuple_SET_ITEM(res, 1, parcel);
2610 PyList_Append(self->events, res);
2611 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002612 }
2613 else {
2614 Py_DECREF(action);
2615 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002617 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618}
2619
2620/* -------------------------------------------------------------------- */
2621/* methods (in alphabetical order) */
2622
Serhiy Storchakacb985562015-05-04 15:32:48 +03002623/*[clinic input]
2624_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625
Serhiy Storchakacb985562015-05-04 15:32:48 +03002626 data: object
2627 /
2628
2629[clinic start generated code]*/
2630
2631static PyObject *
2632_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2633/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2634{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635 return treebuilder_handle_data(self, data);
2636}
2637
Serhiy Storchakacb985562015-05-04 15:32:48 +03002638/*[clinic input]
2639_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002640
Serhiy Storchakacb985562015-05-04 15:32:48 +03002641 tag: object
2642 /
2643
2644[clinic start generated code]*/
2645
2646static PyObject *
2647_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2648/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2649{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002650 return treebuilder_handle_end(self, tag);
2651}
2652
2653LOCAL(PyObject*)
2654treebuilder_done(TreeBuilderObject* self)
2655{
2656 PyObject* res;
2657
2658 /* FIXME: check stack size? */
2659
2660 if (self->root)
2661 res = self->root;
2662 else
2663 res = Py_None;
2664
2665 Py_INCREF(res);
2666 return res;
2667}
2668
Serhiy Storchakacb985562015-05-04 15:32:48 +03002669/*[clinic input]
2670_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002671
Serhiy Storchakacb985562015-05-04 15:32:48 +03002672[clinic start generated code]*/
2673
2674static PyObject *
2675_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2676/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2677{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002678 return treebuilder_done(self);
2679}
2680
Serhiy Storchakacb985562015-05-04 15:32:48 +03002681/*[clinic input]
2682_elementtree.TreeBuilder.start
2683
2684 tag: object
2685 attrs: object = None
2686 /
2687
2688[clinic start generated code]*/
2689
2690static PyObject *
2691_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2692 PyObject *attrs)
2693/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002695 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002696}
2697
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002698/* ==================================================================== */
2699/* the expat interface */
2700
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002702#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002703
2704/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2705 * cached globally without being in per-module state.
2706 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002707static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002708#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709
Eli Bendersky52467b12012-06-01 07:13:08 +03002710static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2711 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2712
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002713typedef struct {
2714 PyObject_HEAD
2715
2716 XML_Parser parser;
2717
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002718 PyObject *target;
2719 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002720
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002721 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002723 PyObject *handle_start;
2724 PyObject *handle_data;
2725 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002727 PyObject *handle_comment;
2728 PyObject *handle_pi;
2729 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002731 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002732
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733} XMLParserObject;
2734
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002735#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
2736
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002737/* helpers */
2738
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739LOCAL(PyObject*)
2740makeuniversal(XMLParserObject* self, const char* string)
2741{
2742 /* convert a UTF-8 tag/attribute name from the expat parser
2743 to a universal name string */
2744
Antoine Pitrouc1948842012-10-01 23:40:37 +02002745 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746 PyObject* key;
2747 PyObject* value;
2748
2749 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002750 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751 if (!key)
2752 return NULL;
2753
2754 value = PyDict_GetItem(self->names, key);
2755
2756 if (value) {
2757 Py_INCREF(value);
2758 } else {
2759 /* new name. convert to universal name, and decode as
2760 necessary */
2761
2762 PyObject* tag;
2763 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002764 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002765
2766 /* look for namespace separator */
2767 for (i = 0; i < size; i++)
2768 if (string[i] == '}')
2769 break;
2770 if (i != size) {
2771 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002772 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002773 if (tag == NULL) {
2774 Py_DECREF(key);
2775 return NULL;
2776 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002777 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778 p[0] = '{';
2779 memcpy(p+1, string, size);
2780 size++;
2781 } else {
2782 /* plain name; use key as tag */
2783 Py_INCREF(key);
2784 tag = key;
2785 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002786
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002787 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002788 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002789 value = PyUnicode_DecodeUTF8(p, size, "strict");
2790 Py_DECREF(tag);
2791 if (!value) {
2792 Py_DECREF(key);
2793 return NULL;
2794 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795
2796 /* add to names dictionary */
2797 if (PyDict_SetItem(self->names, key, value) < 0) {
2798 Py_DECREF(key);
2799 Py_DECREF(value);
2800 return NULL;
2801 }
2802 }
2803
2804 Py_DECREF(key);
2805 return value;
2806}
2807
Eli Bendersky5b77d812012-03-16 08:20:05 +02002808/* Set the ParseError exception with the given parameters.
2809 * If message is not NULL, it's used as the error string. Otherwise, the
2810 * message string is the default for the given error_code.
2811*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002812static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002813expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2814 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002815{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002816 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002817 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002818
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002819 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002820 message ? message : EXPAT(ErrorString)(error_code),
2821 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002822 if (errmsg == NULL)
2823 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002824
Eli Bendersky532d03e2013-08-10 08:00:39 -07002825 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002826 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002827 if (!error)
2828 return;
2829
Eli Bendersky5b77d812012-03-16 08:20:05 +02002830 /* Add code and position attributes */
2831 code = PyLong_FromLong((long)error_code);
2832 if (!code) {
2833 Py_DECREF(error);
2834 return;
2835 }
2836 if (PyObject_SetAttrString(error, "code", code) == -1) {
2837 Py_DECREF(error);
2838 Py_DECREF(code);
2839 return;
2840 }
2841 Py_DECREF(code);
2842
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002843 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002844 if (!position) {
2845 Py_DECREF(error);
2846 return;
2847 }
2848 if (PyObject_SetAttrString(error, "position", position) == -1) {
2849 Py_DECREF(error);
2850 Py_DECREF(position);
2851 return;
2852 }
2853 Py_DECREF(position);
2854
Eli Bendersky532d03e2013-08-10 08:00:39 -07002855 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002856 Py_DECREF(error);
2857}
2858
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002859/* -------------------------------------------------------------------- */
2860/* handlers */
2861
2862static void
2863expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2864 int data_len)
2865{
2866 PyObject* key;
2867 PyObject* value;
2868 PyObject* res;
2869
2870 if (data_len < 2 || data_in[0] != '&')
2871 return;
2872
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002873 if (PyErr_Occurred())
2874 return;
2875
Neal Norwitz0269b912007-08-08 06:56:02 +00002876 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002877 if (!key)
2878 return;
2879
2880 value = PyDict_GetItem(self->entity, key);
2881
2882 if (value) {
2883 if (TreeBuilder_CheckExact(self->target))
2884 res = treebuilder_handle_data(
2885 (TreeBuilderObject*) self->target, value
2886 );
2887 else if (self->handle_data)
2888 res = PyObject_CallFunction(self->handle_data, "O", value);
2889 else
2890 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002891 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002892 } else if (!PyErr_Occurred()) {
2893 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002894 char message[128] = "undefined entity ";
2895 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002896 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002897 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002898 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002899 EXPAT(GetErrorColumnNumber)(self->parser),
2900 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002901 );
2902 }
2903
2904 Py_DECREF(key);
2905}
2906
2907static void
2908expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2909 const XML_Char **attrib_in)
2910{
2911 PyObject* res;
2912 PyObject* tag;
2913 PyObject* attrib;
2914 int ok;
2915
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002916 if (PyErr_Occurred())
2917 return;
2918
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002919 /* tag name */
2920 tag = makeuniversal(self, tag_in);
2921 if (!tag)
2922 return; /* parser will look for errors */
2923
2924 /* attributes */
2925 if (attrib_in[0]) {
2926 attrib = PyDict_New();
2927 if (!attrib)
2928 return;
2929 while (attrib_in[0] && attrib_in[1]) {
2930 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002931 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002932 if (!key || !value) {
2933 Py_XDECREF(value);
2934 Py_XDECREF(key);
2935 Py_DECREF(attrib);
2936 return;
2937 }
2938 ok = PyDict_SetItem(attrib, key, value);
2939 Py_DECREF(value);
2940 Py_DECREF(key);
2941 if (ok < 0) {
2942 Py_DECREF(attrib);
2943 return;
2944 }
2945 attrib_in += 2;
2946 }
2947 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002948 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002949 attrib = PyDict_New();
2950 if (!attrib)
2951 return;
2952 }
2953
2954 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 /* shortcut */
2956 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2957 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002958 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002959 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002960 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002961 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002962 res = NULL;
2963
2964 Py_DECREF(tag);
2965 Py_DECREF(attrib);
2966
2967 Py_XDECREF(res);
2968}
2969
2970static void
2971expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2972 int data_len)
2973{
2974 PyObject* data;
2975 PyObject* res;
2976
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002977 if (PyErr_Occurred())
2978 return;
2979
Neal Norwitz0269b912007-08-08 06:56:02 +00002980 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002981 if (!data)
2982 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002983
2984 if (TreeBuilder_CheckExact(self->target))
2985 /* shortcut */
2986 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
2987 else if (self->handle_data)
2988 res = PyObject_CallFunction(self->handle_data, "O", data);
2989 else
2990 res = NULL;
2991
2992 Py_DECREF(data);
2993
2994 Py_XDECREF(res);
2995}
2996
2997static void
2998expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
2999{
3000 PyObject* tag;
3001 PyObject* res = NULL;
3002
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003003 if (PyErr_Occurred())
3004 return;
3005
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003006 if (TreeBuilder_CheckExact(self->target))
3007 /* shortcut */
3008 /* the standard tree builder doesn't look at the end tag */
3009 res = treebuilder_handle_end(
3010 (TreeBuilderObject*) self->target, Py_None
3011 );
3012 else if (self->handle_end) {
3013 tag = makeuniversal(self, tag_in);
3014 if (tag) {
3015 res = PyObject_CallFunction(self->handle_end, "O", tag);
3016 Py_DECREF(tag);
3017 }
3018 }
3019
3020 Py_XDECREF(res);
3021}
3022
3023static void
3024expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3025 const XML_Char *uri)
3026{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003027 PyObject* sprefix = NULL;
3028 PyObject* suri = NULL;
3029
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003030 if (PyErr_Occurred())
3031 return;
3032
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003033 if (uri)
Eli Bendersky4b795182013-11-28 06:33:21 -08003034 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003035 else
Eli Bendersky4b795182013-11-28 06:33:21 -08003036 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003037 if (!suri)
3038 return;
3039
3040 if (prefix)
3041 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3042 else
3043 sprefix = PyUnicode_FromString("");
3044 if (!sprefix) {
3045 Py_DECREF(suri);
3046 return;
3047 }
3048
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003049 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003050 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003051 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003052
3053 Py_DECREF(sprefix);
3054 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003055}
3056
3057static void
3058expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3059{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003060 if (PyErr_Occurred())
3061 return;
3062
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003063 treebuilder_handle_namespace(
3064 (TreeBuilderObject*) self->target, 0, NULL, NULL
3065 );
3066}
3067
3068static void
3069expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3070{
3071 PyObject* comment;
3072 PyObject* res;
3073
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003074 if (PyErr_Occurred())
3075 return;
3076
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003077 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003078 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079 if (comment) {
3080 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3081 Py_XDECREF(res);
3082 Py_DECREF(comment);
3083 }
3084 }
3085}
3086
Eli Bendersky45839902013-01-13 05:14:47 -08003087static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003088expat_start_doctype_handler(XMLParserObject *self,
3089 const XML_Char *doctype_name,
3090 const XML_Char *sysid,
3091 const XML_Char *pubid,
3092 int has_internal_subset)
3093{
3094 PyObject *self_pyobj = (PyObject *)self;
3095 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3096 PyObject *parser_doctype = NULL;
3097 PyObject *res = NULL;
3098
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003099 if (PyErr_Occurred())
3100 return;
3101
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003102 doctype_name_obj = makeuniversal(self, doctype_name);
3103 if (!doctype_name_obj)
3104 return;
3105
3106 if (sysid) {
3107 sysid_obj = makeuniversal(self, sysid);
3108 if (!sysid_obj) {
3109 Py_DECREF(doctype_name_obj);
3110 return;
3111 }
3112 } else {
3113 Py_INCREF(Py_None);
3114 sysid_obj = Py_None;
3115 }
3116
3117 if (pubid) {
3118 pubid_obj = makeuniversal(self, pubid);
3119 if (!pubid_obj) {
3120 Py_DECREF(doctype_name_obj);
3121 Py_DECREF(sysid_obj);
3122 return;
3123 }
3124 } else {
3125 Py_INCREF(Py_None);
3126 pubid_obj = Py_None;
3127 }
3128
3129 /* If the target has a handler for doctype, call it. */
3130 if (self->handle_doctype) {
3131 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3132 doctype_name_obj, pubid_obj, sysid_obj);
3133 Py_CLEAR(res);
3134 }
3135
3136 /* Now see if the parser itself has a doctype method. If yes and it's
3137 * a subclass, call it but warn about deprecation. If it's not a subclass
3138 * (i.e. vanilla XMLParser), do nothing.
3139 */
3140 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3141 if (parser_doctype) {
3142 if (!XMLParser_CheckExact(self_pyobj)) {
3143 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3144 "This method of XMLParser is deprecated. Define"
3145 " doctype() method on the TreeBuilder target.",
3146 1) < 0) {
3147 goto clear;
3148 }
3149 res = PyObject_CallFunction(parser_doctype, "OOO",
3150 doctype_name_obj, pubid_obj, sysid_obj);
3151 Py_CLEAR(res);
3152 }
3153 }
3154
3155clear:
3156 Py_XDECREF(parser_doctype);
3157 Py_DECREF(doctype_name_obj);
3158 Py_DECREF(pubid_obj);
3159 Py_DECREF(sysid_obj);
3160}
3161
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003162static void
3163expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3164 const XML_Char* data_in)
3165{
3166 PyObject* target;
3167 PyObject* data;
3168 PyObject* res;
3169
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003170 if (PyErr_Occurred())
3171 return;
3172
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003173 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003174 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3175 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003176 if (target && data) {
3177 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3178 Py_XDECREF(res);
3179 Py_DECREF(data);
3180 Py_DECREF(target);
3181 } else {
3182 Py_XDECREF(data);
3183 Py_XDECREF(target);
3184 }
3185 }
3186}
3187
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003188/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003189
Eli Bendersky52467b12012-06-01 07:13:08 +03003190static PyObject *
3191xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003192{
Eli Bendersky52467b12012-06-01 07:13:08 +03003193 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3194 if (self) {
3195 self->parser = NULL;
3196 self->target = self->entity = self->names = NULL;
3197 self->handle_start = self->handle_data = self->handle_end = NULL;
3198 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003199 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003200 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003201 return (PyObject *)self;
3202}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003203
Serhiy Storchakacb985562015-05-04 15:32:48 +03003204/*[clinic input]
3205_elementtree.XMLParser.__init__
3206
3207 html: object = NULL
3208 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003209 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003210
3211[clinic start generated code]*/
3212
Eli Bendersky52467b12012-06-01 07:13:08 +03003213static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003214_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3215 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003216/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003217{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003218 self->entity = PyDict_New();
3219 if (!self->entity)
3220 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221
Serhiy Storchakacb985562015-05-04 15:32:48 +03003222 self->names = PyDict_New();
3223 if (!self->names) {
3224 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003225 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003227
Serhiy Storchakacb985562015-05-04 15:32:48 +03003228 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3229 if (!self->parser) {
3230 Py_CLEAR(self->entity);
3231 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003233 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003234 }
3235
Eli Bendersky52467b12012-06-01 07:13:08 +03003236 if (target) {
3237 Py_INCREF(target);
3238 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003239 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003240 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003241 Py_CLEAR(self->entity);
3242 Py_CLEAR(self->names);
3243 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003244 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003246 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248
Serhiy Storchakacb985562015-05-04 15:32:48 +03003249 self->handle_start = PyObject_GetAttrString(target, "start");
3250 self->handle_data = PyObject_GetAttrString(target, "data");
3251 self->handle_end = PyObject_GetAttrString(target, "end");
3252 self->handle_comment = PyObject_GetAttrString(target, "comment");
3253 self->handle_pi = PyObject_GetAttrString(target, "pi");
3254 self->handle_close = PyObject_GetAttrString(target, "close");
3255 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256
3257 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003258
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003260 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003262 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 (XML_StartElementHandler) expat_start_handler,
3264 (XML_EndElementHandler) expat_end_handler
3265 );
3266 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003267 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 (XML_DefaultHandler) expat_default_handler
3269 );
3270 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003271 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 (XML_CharacterDataHandler) expat_data_handler
3273 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003274 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003276 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277 (XML_CommentHandler) expat_comment_handler
3278 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003279 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003280 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003281 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 (XML_ProcessingInstructionHandler) expat_pi_handler
3283 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003284 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003286 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3287 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003290 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292
Eli Bendersky52467b12012-06-01 07:13:08 +03003293 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003294}
3295
Eli Bendersky52467b12012-06-01 07:13:08 +03003296static int
3297xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3298{
3299 Py_VISIT(self->handle_close);
3300 Py_VISIT(self->handle_pi);
3301 Py_VISIT(self->handle_comment);
3302 Py_VISIT(self->handle_end);
3303 Py_VISIT(self->handle_data);
3304 Py_VISIT(self->handle_start);
3305
3306 Py_VISIT(self->target);
3307 Py_VISIT(self->entity);
3308 Py_VISIT(self->names);
3309
3310 return 0;
3311}
3312
3313static int
3314xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315{
3316 EXPAT(ParserFree)(self->parser);
3317
Antoine Pitrouc1948842012-10-01 23:40:37 +02003318 Py_CLEAR(self->handle_close);
3319 Py_CLEAR(self->handle_pi);
3320 Py_CLEAR(self->handle_comment);
3321 Py_CLEAR(self->handle_end);
3322 Py_CLEAR(self->handle_data);
3323 Py_CLEAR(self->handle_start);
3324 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003325
Antoine Pitrouc1948842012-10-01 23:40:37 +02003326 Py_CLEAR(self->target);
3327 Py_CLEAR(self->entity);
3328 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003329
Eli Bendersky52467b12012-06-01 07:13:08 +03003330 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331}
3332
Eli Bendersky52467b12012-06-01 07:13:08 +03003333static void
3334xmlparser_dealloc(XMLParserObject* self)
3335{
3336 PyObject_GC_UnTrack(self);
3337 xmlparser_gc_clear(self);
3338 Py_TYPE(self)->tp_free((PyObject *)self);
3339}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003340
3341LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003342expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003343{
3344 int ok;
3345
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003346 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3348
3349 if (PyErr_Occurred())
3350 return NULL;
3351
3352 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003353 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003354 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003355 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003356 EXPAT(GetErrorColumnNumber)(self->parser),
3357 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358 );
3359 return NULL;
3360 }
3361
3362 Py_RETURN_NONE;
3363}
3364
Serhiy Storchakacb985562015-05-04 15:32:48 +03003365/*[clinic input]
3366_elementtree.XMLParser.close
3367
3368[clinic start generated code]*/
3369
3370static PyObject *
3371_elementtree_XMLParser_close_impl(XMLParserObject *self)
3372/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003373{
3374 /* end feeding data to parser */
3375
3376 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003377 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003378 if (!res)
3379 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003380
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003381 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003382 Py_DECREF(res);
3383 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003384 }
3385 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003386 Py_DECREF(res);
3387 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003388 }
3389 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003390 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003391 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003392}
3393
Serhiy Storchakacb985562015-05-04 15:32:48 +03003394/*[clinic input]
3395_elementtree.XMLParser.feed
3396
3397 data: object
3398 /
3399
3400[clinic start generated code]*/
3401
3402static PyObject *
3403_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3404/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405{
3406 /* feed data to parser */
3407
Serhiy Storchakacb985562015-05-04 15:32:48 +03003408 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003409 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003410 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3411 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003412 return NULL;
3413 if (data_len > INT_MAX) {
3414 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3415 return NULL;
3416 }
3417 /* Explicitly set UTF-8 encoding. Return code ignored. */
3418 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003419 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003420 }
3421 else {
3422 Py_buffer view;
3423 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003424 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003425 return NULL;
3426 if (view.len > INT_MAX) {
3427 PyBuffer_Release(&view);
3428 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3429 return NULL;
3430 }
3431 res = expat_parse(self, view.buf, (int)view.len, 0);
3432 PyBuffer_Release(&view);
3433 return res;
3434 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003435}
3436
Serhiy Storchakacb985562015-05-04 15:32:48 +03003437/*[clinic input]
3438_elementtree.XMLParser._parse_whole
3439
3440 file: object
3441 /
3442
3443[clinic start generated code]*/
3444
3445static PyObject *
3446_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3447/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003448{
Eli Benderskya3699232013-05-19 18:47:23 -07003449 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003450 PyObject* reader;
3451 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003452 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003453 PyObject* res;
3454
Serhiy Storchakacb985562015-05-04 15:32:48 +03003455 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456 if (!reader)
3457 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003458
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003459 /* read from open file object */
3460 for (;;) {
3461
3462 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3463
3464 if (!buffer) {
3465 /* read failed (e.g. due to KeyboardInterrupt) */
3466 Py_DECREF(reader);
3467 return NULL;
3468 }
3469
Eli Benderskyf996e772012-03-16 05:53:30 +02003470 if (PyUnicode_CheckExact(buffer)) {
3471 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003472 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003473 Py_DECREF(buffer);
3474 break;
3475 }
3476 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003477 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003478 if (!temp) {
3479 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003480 Py_DECREF(reader);
3481 return NULL;
3482 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003483 buffer = temp;
3484 }
3485 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003486 Py_DECREF(buffer);
3487 break;
3488 }
3489
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003490 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3491 Py_DECREF(buffer);
3492 Py_DECREF(reader);
3493 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3494 return NULL;
3495 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003496 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003497 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 );
3499
3500 Py_DECREF(buffer);
3501
3502 if (!res) {
3503 Py_DECREF(reader);
3504 return NULL;
3505 }
3506 Py_DECREF(res);
3507
3508 }
3509
3510 Py_DECREF(reader);
3511
3512 res = expat_parse(self, "", 0, 1);
3513
3514 if (res && TreeBuilder_CheckExact(self->target)) {
3515 Py_DECREF(res);
3516 return treebuilder_done((TreeBuilderObject*) self->target);
3517 }
3518
3519 return res;
3520}
3521
Serhiy Storchakacb985562015-05-04 15:32:48 +03003522/*[clinic input]
3523_elementtree.XMLParser.doctype
3524
3525[clinic start generated code]*/
3526
3527static PyObject *
3528_elementtree_XMLParser_doctype_impl(XMLParserObject *self)
3529/*[clinic end generated code: output=d09fdb9c45f3a602 input=20d5e0febf902a2f]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003530{
3531 Py_RETURN_NONE;
3532}
3533
Serhiy Storchakacb985562015-05-04 15:32:48 +03003534/*[clinic input]
3535_elementtree.XMLParser._setevents
3536
3537 events_queue: object(subclass_of='&PyList_Type')
3538 events_to_report: object = None
3539 /
3540
3541[clinic start generated code]*/
3542
3543static PyObject *
3544_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3545 PyObject *events_queue,
3546 PyObject *events_to_report)
3547/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003548{
3549 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003550 Py_ssize_t i, seqlen;
3551 TreeBuilderObject *target;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003552 PyObject *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003553
3554 if (!TreeBuilder_CheckExact(self->target)) {
3555 PyErr_SetString(
3556 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003557 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003558 "targets"
3559 );
3560 return NULL;
3561 }
3562
3563 target = (TreeBuilderObject*) self->target;
3564
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003565 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003566 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003567 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003568
3569 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003570 Py_CLEAR(target->start_event_obj);
3571 Py_CLEAR(target->end_event_obj);
3572 Py_CLEAR(target->start_ns_event_obj);
3573 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003574
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003575 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003576 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003577 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578 Py_RETURN_NONE;
3579 }
3580
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003581 if (!(events_seq = PySequence_Fast(events_to_report,
3582 "events must be a sequence"))) {
3583 return NULL;
3584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003585
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003586 seqlen = PySequence_Size(events_seq);
3587 for (i = 0; i < seqlen; ++i) {
3588 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3589 char *event_name = NULL;
3590 if (PyUnicode_Check(event_name_obj)) {
3591 event_name = _PyUnicode_AsString(event_name_obj);
3592 } else if (PyBytes_Check(event_name_obj)) {
3593 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003594 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003595
3596 if (event_name == NULL) {
3597 Py_DECREF(events_seq);
3598 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3599 return NULL;
3600 } else if (strcmp(event_name, "start") == 0) {
3601 Py_INCREF(event_name_obj);
3602 target->start_event_obj = event_name_obj;
3603 } else if (strcmp(event_name, "end") == 0) {
3604 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003606 target->end_event_obj = event_name_obj;
3607 } else if (strcmp(event_name, "start-ns") == 0) {
3608 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003610 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611 EXPAT(SetNamespaceDeclHandler)(
3612 self->parser,
3613 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3614 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3615 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003616 } else if (strcmp(event_name, "end-ns") == 0) {
3617 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003618 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003619 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003620 EXPAT(SetNamespaceDeclHandler)(
3621 self->parser,
3622 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3623 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3624 );
3625 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003626 Py_DECREF(events_seq);
3627 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003628 return NULL;
3629 }
3630 }
3631
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003632 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003633 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634}
3635
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003636static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003637xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003639 if (PyUnicode_Check(nameobj)) {
3640 PyObject* res;
3641 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3642 res = self->entity;
3643 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3644 res = self->target;
3645 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3646 return PyUnicode_FromFormat(
3647 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003649 }
3650 else
3651 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003652
Alexander Belopolskye239d232010-12-08 23:31:48 +00003653 Py_INCREF(res);
3654 return res;
3655 }
3656 generic:
3657 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003658}
3659
Serhiy Storchakacb985562015-05-04 15:32:48 +03003660#include "clinic/_elementtree.c.h"
3661
3662static PyMethodDef element_methods[] = {
3663
3664 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3665
3666 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3667 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3668
3669 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3670 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3671 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3672
3673 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3674 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3675 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3676 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3677
3678 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3679 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3680 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3681
3682 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3683 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3684
3685 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3686 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3687
3688 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3689
3690 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3691 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3692 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3693 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3694 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3695
3696 {NULL, NULL}
3697};
3698
3699static PyMappingMethods element_as_mapping = {
3700 (lenfunc) element_length,
3701 (binaryfunc) element_subscr,
3702 (objobjargproc) element_ass_subscr,
3703};
3704
3705static PyTypeObject Element_Type = {
3706 PyVarObject_HEAD_INIT(NULL, 0)
3707 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3708 /* methods */
3709 (destructor)element_dealloc, /* tp_dealloc */
3710 0, /* tp_print */
3711 0, /* tp_getattr */
3712 0, /* tp_setattr */
3713 0, /* tp_reserved */
3714 (reprfunc)element_repr, /* tp_repr */
3715 0, /* tp_as_number */
3716 &element_as_sequence, /* tp_as_sequence */
3717 &element_as_mapping, /* tp_as_mapping */
3718 0, /* tp_hash */
3719 0, /* tp_call */
3720 0, /* tp_str */
3721 (getattrofunc)element_getattro, /* tp_getattro */
3722 (setattrofunc)element_setattro, /* tp_setattro */
3723 0, /* tp_as_buffer */
3724 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3725 /* tp_flags */
3726 0, /* tp_doc */
3727 (traverseproc)element_gc_traverse, /* tp_traverse */
3728 (inquiry)element_gc_clear, /* tp_clear */
3729 0, /* tp_richcompare */
3730 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3731 0, /* tp_iter */
3732 0, /* tp_iternext */
3733 element_methods, /* tp_methods */
3734 0, /* tp_members */
3735 0, /* tp_getset */
3736 0, /* tp_base */
3737 0, /* tp_dict */
3738 0, /* tp_descr_get */
3739 0, /* tp_descr_set */
3740 0, /* tp_dictoffset */
3741 (initproc)element_init, /* tp_init */
3742 PyType_GenericAlloc, /* tp_alloc */
3743 element_new, /* tp_new */
3744 0, /* tp_free */
3745};
3746
3747static PyMethodDef treebuilder_methods[] = {
3748 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3749 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3750 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3751 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3752 {NULL, NULL}
3753};
3754
3755static PyTypeObject TreeBuilder_Type = {
3756 PyVarObject_HEAD_INIT(NULL, 0)
3757 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3758 /* methods */
3759 (destructor)treebuilder_dealloc, /* tp_dealloc */
3760 0, /* tp_print */
3761 0, /* tp_getattr */
3762 0, /* tp_setattr */
3763 0, /* tp_reserved */
3764 0, /* tp_repr */
3765 0, /* tp_as_number */
3766 0, /* tp_as_sequence */
3767 0, /* tp_as_mapping */
3768 0, /* tp_hash */
3769 0, /* tp_call */
3770 0, /* tp_str */
3771 0, /* tp_getattro */
3772 0, /* tp_setattro */
3773 0, /* tp_as_buffer */
3774 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3775 /* tp_flags */
3776 0, /* tp_doc */
3777 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3778 (inquiry)treebuilder_gc_clear, /* tp_clear */
3779 0, /* tp_richcompare */
3780 0, /* tp_weaklistoffset */
3781 0, /* tp_iter */
3782 0, /* tp_iternext */
3783 treebuilder_methods, /* tp_methods */
3784 0, /* tp_members */
3785 0, /* tp_getset */
3786 0, /* tp_base */
3787 0, /* tp_dict */
3788 0, /* tp_descr_get */
3789 0, /* tp_descr_set */
3790 0, /* tp_dictoffset */
3791 _elementtree_TreeBuilder___init__, /* tp_init */
3792 PyType_GenericAlloc, /* tp_alloc */
3793 treebuilder_new, /* tp_new */
3794 0, /* tp_free */
3795};
3796
3797static PyMethodDef xmlparser_methods[] = {
3798 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3799 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3800 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3801 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3802 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3803 {NULL, NULL}
3804};
3805
Neal Norwitz227b5332006-03-22 09:28:35 +00003806static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003807 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003808 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003809 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003810 (destructor)xmlparser_dealloc, /* tp_dealloc */
3811 0, /* tp_print */
3812 0, /* tp_getattr */
3813 0, /* tp_setattr */
3814 0, /* tp_reserved */
3815 0, /* tp_repr */
3816 0, /* tp_as_number */
3817 0, /* tp_as_sequence */
3818 0, /* tp_as_mapping */
3819 0, /* tp_hash */
3820 0, /* tp_call */
3821 0, /* tp_str */
3822 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3823 0, /* tp_setattro */
3824 0, /* tp_as_buffer */
3825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3826 /* tp_flags */
3827 0, /* tp_doc */
3828 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3829 (inquiry)xmlparser_gc_clear, /* tp_clear */
3830 0, /* tp_richcompare */
3831 0, /* tp_weaklistoffset */
3832 0, /* tp_iter */
3833 0, /* tp_iternext */
3834 xmlparser_methods, /* tp_methods */
3835 0, /* tp_members */
3836 0, /* tp_getset */
3837 0, /* tp_base */
3838 0, /* tp_dict */
3839 0, /* tp_descr_get */
3840 0, /* tp_descr_set */
3841 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003842 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003843 PyType_GenericAlloc, /* tp_alloc */
3844 xmlparser_new, /* tp_new */
3845 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003846};
3847
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003848/* ==================================================================== */
3849/* python module interface */
3850
3851static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003852 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003853 {NULL, NULL}
3854};
3855
Martin v. Löwis1a214512008-06-11 05:26:20 +00003856
Eli Bendersky532d03e2013-08-10 08:00:39 -07003857static struct PyModuleDef elementtreemodule = {
3858 PyModuleDef_HEAD_INIT,
3859 "_elementtree",
3860 NULL,
3861 sizeof(elementtreestate),
3862 _functions,
3863 NULL,
3864 elementtree_traverse,
3865 elementtree_clear,
3866 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003867};
3868
Neal Norwitzf6657e62006-12-28 04:47:50 +00003869PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003870PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003871{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003872 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003873 elementtreestate *st;
3874
3875 m = PyState_FindModule(&elementtreemodule);
3876 if (m) {
3877 Py_INCREF(m);
3878 return m;
3879 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003880
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003881 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003882 if (PyType_Ready(&ElementIter_Type) < 0)
3883 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003884 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003885 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003886 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003887 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003888 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003889 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003890
Eli Bendersky532d03e2013-08-10 08:00:39 -07003891 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003892 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003893 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003894 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003895
Eli Bendersky828efde2012-04-05 05:40:58 +03003896 if (!(temp = PyImport_ImportModule("copy")))
3897 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003898 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003899 Py_XDECREF(temp);
3900
Eli Bendersky532d03e2013-08-10 08:00:39 -07003901 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003902 return NULL;
3903
Eli Bendersky20d41742012-06-01 09:48:37 +03003904 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003905 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3906 if (expat_capi) {
3907 /* check that it's usable */
3908 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003909 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003910 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3911 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003912 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003913 PyErr_SetString(PyExc_ImportError,
3914 "pyexpat version is incompatible");
3915 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003916 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003917 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003918 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003919 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003920
Eli Bendersky532d03e2013-08-10 08:00:39 -07003921 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003922 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003923 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003924 Py_INCREF(st->parseerror_obj);
3925 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003926
Eli Bendersky092af1f2012-03-04 07:14:03 +02003927 Py_INCREF((PyObject *)&Element_Type);
3928 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3929
Eli Bendersky58d548d2012-05-29 15:45:16 +03003930 Py_INCREF((PyObject *)&TreeBuilder_Type);
3931 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3932
Eli Bendersky52467b12012-06-01 07:13:08 +03003933 Py_INCREF((PyObject *)&XMLParser_Type);
3934 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003935
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003936 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003937}