blob: 11d1aece4fa00163518b1516d7cb43626f45820f [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200850 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200938 Py_INCREF(tag);
939 Py_SETREF(self->tag, tag);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800940
Eli Benderskydd3661e2013-09-13 06:24:25 -0700941 _clear_joined_ptr(&self->text);
942 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
943 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800944
Eli Benderskydd3661e2013-09-13 06:24:25 -0700945 _clear_joined_ptr(&self->tail);
946 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
947 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800948
949 /* Handle ATTRIB and CHILDREN. */
950 if (!children && !attrib)
951 Py_RETURN_NONE;
952
953 /* Compute 'nchildren'. */
954 if (children) {
955 if (!PyList_Check(children)) {
956 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
957 return NULL;
958 }
959 nchildren = PyList_Size(children);
960 }
961 else {
962 nchildren = 0;
963 }
964
965 /* Allocate 'extra'. */
966 if (element_resize(self, nchildren)) {
967 return NULL;
968 }
969 assert(self->extra && self->extra->allocated >= nchildren);
970
971 /* Copy children */
972 for (i = 0; i < nchildren; i++) {
973 self->extra->children[i] = PyList_GET_ITEM(children, i);
974 Py_INCREF(self->extra->children[i]);
975 }
976
977 self->extra->length = nchildren;
978 self->extra->allocated = nchildren;
979
980 /* Stash attrib. */
981 if (attrib) {
Eli Bendersky698bdb22013-01-10 06:01:06 -0800982 Py_INCREF(attrib);
Serhiy Storchaka191321d2015-12-27 15:41:34 +0200983 Py_SETREF(self->extra->attrib, attrib);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800984 }
985
986 Py_RETURN_NONE;
987}
988
989/* __setstate__ for Element instance from the Python implementation.
990 * 'state' should be the instance dict.
991 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300992
Eli Bendersky698bdb22013-01-10 06:01:06 -0800993static PyObject *
994element_setstate_from_Python(ElementObject *self, PyObject *state)
995{
996 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
997 PICKLED_TAIL, PICKLED_CHILDREN, 0};
998 PyObject *args;
999 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001000 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001001
Eli Bendersky698bdb22013-01-10 06:01:06 -08001002 tag = attrib = text = tail = children = NULL;
1003 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001004 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001005 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006
1007 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1008 &attrib, &text, &tail, &children))
1009 retval = element_setstate_from_attributes(self, tag, attrib, text,
1010 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001011 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001012 retval = NULL;
1013
1014 Py_DECREF(args);
1015 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001016}
1017
Serhiy Storchakacb985562015-05-04 15:32:48 +03001018/*[clinic input]
1019_elementtree.Element.__setstate__
1020
1021 state: object
1022 /
1023
1024[clinic start generated code]*/
1025
Eli Bendersky698bdb22013-01-10 06:01:06 -08001026static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001027_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1028/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001029{
1030 if (!PyDict_CheckExact(state)) {
1031 PyErr_Format(PyExc_TypeError,
1032 "Don't know how to unpickle \"%.200R\" as an Element",
1033 state);
1034 return NULL;
1035 }
1036 else
1037 return element_setstate_from_Python(self, state);
1038}
1039
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001040LOCAL(int)
1041checkpath(PyObject* tag)
1042{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001043 Py_ssize_t i;
1044 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001045
1046 /* check if a tag contains an xpath character */
1047
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001048#define PATHCHAR(ch) \
1049 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001050
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001051 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001052 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1053 void *data = PyUnicode_DATA(tag);
1054 unsigned int kind = PyUnicode_KIND(tag);
1055 for (i = 0; i < len; i++) {
1056 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1057 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001058 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001059 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 return 1;
1063 }
1064 return 0;
1065 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001066 if (PyBytes_Check(tag)) {
1067 char *p = PyBytes_AS_STRING(tag);
1068 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001069 if (p[i] == '{')
1070 check = 0;
1071 else if (p[i] == '}')
1072 check = 1;
1073 else if (check && PATHCHAR(p[i]))
1074 return 1;
1075 }
1076 return 0;
1077 }
1078
1079 return 1; /* unknown type; might be path expression */
1080}
1081
Serhiy Storchakacb985562015-05-04 15:32:48 +03001082/*[clinic input]
1083_elementtree.Element.extend
1084
1085 elements: object
1086 /
1087
1088[clinic start generated code]*/
1089
1090static PyObject *
1091_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1092/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001093{
1094 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001095 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001096
Serhiy Storchakacb985562015-05-04 15:32:48 +03001097 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098 if (!seq) {
1099 PyErr_Format(
1100 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001101 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001102 );
1103 return NULL;
1104 }
1105
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001106 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001107 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 Py_INCREF(element);
1109 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001110 PyErr_Format(
1111 PyExc_TypeError,
1112 "expected an Element, not \"%.200s\"",
1113 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001114 Py_DECREF(seq);
1115 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001116 return NULL;
1117 }
1118
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001119 if (element_add_subelement(self, element) < 0) {
1120 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001121 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001122 return NULL;
1123 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001124 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001125 }
1126
1127 Py_DECREF(seq);
1128
1129 Py_RETURN_NONE;
1130}
1131
Serhiy Storchakacb985562015-05-04 15:32:48 +03001132/*[clinic input]
1133_elementtree.Element.find
1134
1135 path: object
1136 namespaces: object = None
1137
1138[clinic start generated code]*/
1139
1140static PyObject *
1141_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1142 PyObject *namespaces)
1143/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001144{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001145 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001146 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001147
Serhiy Storchakacb985562015-05-04 15:32:48 +03001148 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001149 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001150 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001151 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001153 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154
1155 if (!self->extra)
1156 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001157
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001158 for (i = 0; i < self->extra->length; i++) {
1159 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001160 int rc;
1161 if (!Element_CheckExact(item))
1162 continue;
1163 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001164 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001165 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001166 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 Py_DECREF(item);
1168 if (rc < 0)
1169 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001170 }
1171
1172 Py_RETURN_NONE;
1173}
1174
Serhiy Storchakacb985562015-05-04 15:32:48 +03001175/*[clinic input]
1176_elementtree.Element.findtext
1177
1178 path: object
1179 default: object = None
1180 namespaces: object = None
1181
1182[clinic start generated code]*/
1183
1184static PyObject *
1185_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1186 PyObject *default_value,
1187 PyObject *namespaces)
1188/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001189{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001190 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001191 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001192 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193
Serhiy Storchakacb985562015-05-04 15:32:48 +03001194 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001197 );
1198
1199 if (!self->extra) {
1200 Py_INCREF(default_value);
1201 return default_value;
1202 }
1203
1204 for (i = 0; i < self->extra->length; i++) {
1205 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001206 int rc;
1207 if (!Element_CheckExact(item))
1208 continue;
1209 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001210 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001211 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001212 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (text == Py_None) {
1214 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001215 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001216 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001217 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001219 return text;
1220 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001221 Py_DECREF(item);
1222 if (rc < 0)
1223 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 }
1225
1226 Py_INCREF(default_value);
1227 return default_value;
1228}
1229
Serhiy Storchakacb985562015-05-04 15:32:48 +03001230/*[clinic input]
1231_elementtree.Element.findall
1232
1233 path: object
1234 namespaces: object = None
1235
1236[clinic start generated code]*/
1237
1238static PyObject *
1239_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1240 PyObject *namespaces)
1241/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001242{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001243 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001245 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001246 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001247
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001248 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001249 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001251 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001252 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001253 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 out = PyList_New(0);
1256 if (!out)
1257 return NULL;
1258
1259 if (!self->extra)
1260 return out;
1261
1262 for (i = 0; i < self->extra->length; i++) {
1263 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001264 int rc;
1265 if (!Element_CheckExact(item))
1266 continue;
1267 Py_INCREF(item);
1268 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1269 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1270 Py_DECREF(item);
1271 Py_DECREF(out);
1272 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001273 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001274 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
1276
1277 return out;
1278}
1279
Serhiy Storchakacb985562015-05-04 15:32:48 +03001280/*[clinic input]
1281_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282
Serhiy Storchakacb985562015-05-04 15:32:48 +03001283 path: object
1284 namespaces: object = None
1285
1286[clinic start generated code]*/
1287
1288static PyObject *
1289_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1290 PyObject *namespaces)
1291/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1292{
1293 PyObject* tag = path;
1294 _Py_IDENTIFIER(iterfind);
1295 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001296
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001297 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001298 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001299}
1300
Serhiy Storchakacb985562015-05-04 15:32:48 +03001301/*[clinic input]
1302_elementtree.Element.get
1303
1304 key: object
1305 default: object = None
1306
1307[clinic start generated code]*/
1308
1309static PyObject *
1310_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1311 PyObject *default_value)
1312/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313{
1314 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315
1316 if (!self->extra || self->extra->attrib == Py_None)
1317 value = default_value;
1318 else {
1319 value = PyDict_GetItem(self->extra->attrib, key);
1320 if (!value)
1321 value = default_value;
1322 }
1323
1324 Py_INCREF(value);
1325 return value;
1326}
1327
Serhiy Storchakacb985562015-05-04 15:32:48 +03001328/*[clinic input]
1329_elementtree.Element.getchildren
1330
1331[clinic start generated code]*/
1332
1333static PyObject *
1334_elementtree_Element_getchildren_impl(ElementObject *self)
1335/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001336{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001337 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338 PyObject* list;
1339
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001340 /* FIXME: report as deprecated? */
1341
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001342 if (!self->extra)
1343 return PyList_New(0);
1344
1345 list = PyList_New(self->extra->length);
1346 if (!list)
1347 return NULL;
1348
1349 for (i = 0; i < self->extra->length; i++) {
1350 PyObject* item = self->extra->children[i];
1351 Py_INCREF(item);
1352 PyList_SET_ITEM(list, i, item);
1353 }
1354
1355 return list;
1356}
1357
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001358
Eli Bendersky64d11e62012-06-15 07:42:50 +03001359static PyObject *
1360create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1361
1362
Serhiy Storchakacb985562015-05-04 15:32:48 +03001363/*[clinic input]
1364_elementtree.Element.iter
1365
1366 tag: object = None
1367
1368[clinic start generated code]*/
1369
Eli Bendersky64d11e62012-06-15 07:42:50 +03001370static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001371_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1372/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001373{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001374 if (PyUnicode_Check(tag)) {
1375 if (PyUnicode_READY(tag) < 0)
1376 return NULL;
1377 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1378 tag = Py_None;
1379 }
1380 else if (PyBytes_Check(tag)) {
1381 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1382 tag = Py_None;
1383 }
1384
Eli Bendersky64d11e62012-06-15 07:42:50 +03001385 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001386}
1387
1388
Serhiy Storchakacb985562015-05-04 15:32:48 +03001389/*[clinic input]
1390_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001391
Serhiy Storchakacb985562015-05-04 15:32:48 +03001392[clinic start generated code]*/
1393
1394static PyObject *
1395_elementtree_Element_itertext_impl(ElementObject *self)
1396/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1397{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001398 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001399}
1400
Eli Bendersky64d11e62012-06-15 07:42:50 +03001401
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001402static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001403element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001405 ElementObject* self = (ElementObject*) self_;
1406
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001407 if (!self->extra || index < 0 || index >= self->extra->length) {
1408 PyErr_SetString(
1409 PyExc_IndexError,
1410 "child index out of range"
1411 );
1412 return NULL;
1413 }
1414
1415 Py_INCREF(self->extra->children[index]);
1416 return self->extra->children[index];
1417}
1418
Serhiy Storchakacb985562015-05-04 15:32:48 +03001419/*[clinic input]
1420_elementtree.Element.insert
1421
1422 index: Py_ssize_t
1423 subelement: object(subclass_of='&Element_Type')
1424 /
1425
1426[clinic start generated code]*/
1427
1428static PyObject *
1429_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1430 PyObject *subelement)
1431/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001433 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434
Victor Stinner5f0af232013-07-11 23:01:36 +02001435 if (!self->extra) {
1436 if (create_extra(self, NULL) < 0)
1437 return NULL;
1438 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001440 if (index < 0) {
1441 index += self->extra->length;
1442 if (index < 0)
1443 index = 0;
1444 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001445 if (index > self->extra->length)
1446 index = self->extra->length;
1447
1448 if (element_resize(self, 1) < 0)
1449 return NULL;
1450
1451 for (i = self->extra->length; i > index; i--)
1452 self->extra->children[i] = self->extra->children[i-1];
1453
Serhiy Storchakacb985562015-05-04 15:32:48 +03001454 Py_INCREF(subelement);
1455 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001456
1457 self->extra->length++;
1458
1459 Py_RETURN_NONE;
1460}
1461
Serhiy Storchakacb985562015-05-04 15:32:48 +03001462/*[clinic input]
1463_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001464
Serhiy Storchakacb985562015-05-04 15:32:48 +03001465[clinic start generated code]*/
1466
1467static PyObject *
1468_elementtree_Element_items_impl(ElementObject *self)
1469/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1470{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001471 if (!self->extra || self->extra->attrib == Py_None)
1472 return PyList_New(0);
1473
1474 return PyDict_Items(self->extra->attrib);
1475}
1476
Serhiy Storchakacb985562015-05-04 15:32:48 +03001477/*[clinic input]
1478_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001479
Serhiy Storchakacb985562015-05-04 15:32:48 +03001480[clinic start generated code]*/
1481
1482static PyObject *
1483_elementtree_Element_keys_impl(ElementObject *self)
1484/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1485{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001486 if (!self->extra || self->extra->attrib == Py_None)
1487 return PyList_New(0);
1488
1489 return PyDict_Keys(self->extra->attrib);
1490}
1491
Martin v. Löwis18e16552006-02-15 17:27:45 +00001492static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001493element_length(ElementObject* self)
1494{
1495 if (!self->extra)
1496 return 0;
1497
1498 return self->extra->length;
1499}
1500
Serhiy Storchakacb985562015-05-04 15:32:48 +03001501/*[clinic input]
1502_elementtree.Element.makeelement
1503
1504 tag: object
1505 attrib: object
1506 /
1507
1508[clinic start generated code]*/
1509
1510static PyObject *
1511_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1512 PyObject *attrib)
1513/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001514{
1515 PyObject* elem;
1516
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001517 attrib = PyDict_Copy(attrib);
1518 if (!attrib)
1519 return NULL;
1520
Eli Bendersky092af1f2012-03-04 07:14:03 +02001521 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001522
1523 Py_DECREF(attrib);
1524
1525 return elem;
1526}
1527
Serhiy Storchakacb985562015-05-04 15:32:48 +03001528/*[clinic input]
1529_elementtree.Element.remove
1530
1531 subelement: object(subclass_of='&Element_Type')
1532 /
1533
1534[clinic start generated code]*/
1535
1536static PyObject *
1537_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1538/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001539{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001540 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001541 int rc;
1542 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001543
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001544 if (!self->extra) {
1545 /* element has no children, so raise exception */
1546 PyErr_SetString(
1547 PyExc_ValueError,
1548 "list.remove(x): x not in list"
1549 );
1550 return NULL;
1551 }
1552
1553 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001554 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001555 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001556 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001557 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001558 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001559 if (rc < 0)
1560 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001561 }
1562
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001563 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001564 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001565 PyErr_SetString(
1566 PyExc_ValueError,
1567 "list.remove(x): x not in list"
1568 );
1569 return NULL;
1570 }
1571
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001572 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001573
1574 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575 for (; i < self->extra->length; i++)
1576 self->extra->children[i] = self->extra->children[i+1];
1577
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001578 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001579 Py_RETURN_NONE;
1580}
1581
1582static PyObject*
1583element_repr(ElementObject* self)
1584{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001585 if (self->tag)
1586 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1587 else
1588 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001589}
1590
Serhiy Storchakacb985562015-05-04 15:32:48 +03001591/*[clinic input]
1592_elementtree.Element.set
1593
1594 key: object
1595 value: object
1596 /
1597
1598[clinic start generated code]*/
1599
1600static PyObject *
1601_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1602 PyObject *value)
1603/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001604{
1605 PyObject* attrib;
1606
Victor Stinner5f0af232013-07-11 23:01:36 +02001607 if (!self->extra) {
1608 if (create_extra(self, NULL) < 0)
1609 return NULL;
1610 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001611
1612 attrib = element_get_attrib(self);
1613 if (!attrib)
1614 return NULL;
1615
1616 if (PyDict_SetItem(attrib, key, value) < 0)
1617 return NULL;
1618
1619 Py_RETURN_NONE;
1620}
1621
1622static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001623element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001624{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001625 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001626 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001627 PyObject* old;
1628
1629 if (!self->extra || index < 0 || index >= self->extra->length) {
1630 PyErr_SetString(
1631 PyExc_IndexError,
1632 "child assignment index out of range");
1633 return -1;
1634 }
1635
1636 old = self->extra->children[index];
1637
1638 if (item) {
1639 Py_INCREF(item);
1640 self->extra->children[index] = item;
1641 } else {
1642 self->extra->length--;
1643 for (i = index; i < self->extra->length; i++)
1644 self->extra->children[i] = self->extra->children[i+1];
1645 }
1646
1647 Py_DECREF(old);
1648
1649 return 0;
1650}
1651
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001652static PyObject*
1653element_subscr(PyObject* self_, PyObject* item)
1654{
1655 ElementObject* self = (ElementObject*) self_;
1656
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001657 if (PyIndex_Check(item)) {
1658 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001659
1660 if (i == -1 && PyErr_Occurred()) {
1661 return NULL;
1662 }
1663 if (i < 0 && self->extra)
1664 i += self->extra->length;
1665 return element_getitem(self_, i);
1666 }
1667 else if (PySlice_Check(item)) {
1668 Py_ssize_t start, stop, step, slicelen, cur, i;
1669 PyObject* list;
1670
1671 if (!self->extra)
1672 return PyList_New(0);
1673
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001674 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001675 self->extra->length,
1676 &start, &stop, &step, &slicelen) < 0) {
1677 return NULL;
1678 }
1679
1680 if (slicelen <= 0)
1681 return PyList_New(0);
1682 else {
1683 list = PyList_New(slicelen);
1684 if (!list)
1685 return NULL;
1686
1687 for (cur = start, i = 0; i < slicelen;
1688 cur += step, i++) {
1689 PyObject* item = self->extra->children[cur];
1690 Py_INCREF(item);
1691 PyList_SET_ITEM(list, i, item);
1692 }
1693
1694 return list;
1695 }
1696 }
1697 else {
1698 PyErr_SetString(PyExc_TypeError,
1699 "element indices must be integers");
1700 return NULL;
1701 }
1702}
1703
1704static int
1705element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1706{
1707 ElementObject* self = (ElementObject*) self_;
1708
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001709 if (PyIndex_Check(item)) {
1710 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001711
1712 if (i == -1 && PyErr_Occurred()) {
1713 return -1;
1714 }
1715 if (i < 0 && self->extra)
1716 i += self->extra->length;
1717 return element_setitem(self_, i, value);
1718 }
1719 else if (PySlice_Check(item)) {
1720 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1721
1722 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001723 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001724
Victor Stinner5f0af232013-07-11 23:01:36 +02001725 if (!self->extra) {
1726 if (create_extra(self, NULL) < 0)
1727 return -1;
1728 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001729
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001730 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001731 self->extra->length,
1732 &start, &stop, &step, &slicelen) < 0) {
1733 return -1;
1734 }
1735
Eli Bendersky865756a2012-03-09 13:38:15 +02001736 if (value == NULL) {
1737 /* Delete slice */
1738 size_t cur;
1739 Py_ssize_t i;
1740
1741 if (slicelen <= 0)
1742 return 0;
1743
1744 /* Since we're deleting, the direction of the range doesn't matter,
1745 * so for simplicity make it always ascending.
1746 */
1747 if (step < 0) {
1748 stop = start + 1;
1749 start = stop + step * (slicelen - 1) - 1;
1750 step = -step;
1751 }
1752
1753 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1754
1755 /* recycle is a list that will contain all the children
1756 * scheduled for removal.
1757 */
1758 if (!(recycle = PyList_New(slicelen))) {
1759 PyErr_NoMemory();
1760 return -1;
1761 }
1762
1763 /* This loop walks over all the children that have to be deleted,
1764 * with cur pointing at them. num_moved is the amount of children
1765 * until the next deleted child that have to be "shifted down" to
1766 * occupy the deleted's places.
1767 * Note that in the ith iteration, shifting is done i+i places down
1768 * because i children were already removed.
1769 */
1770 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1771 /* Compute how many children have to be moved, clipping at the
1772 * list end.
1773 */
1774 Py_ssize_t num_moved = step - 1;
1775 if (cur + step >= (size_t)self->extra->length) {
1776 num_moved = self->extra->length - cur - 1;
1777 }
1778
1779 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1780
1781 memmove(
1782 self->extra->children + cur - i,
1783 self->extra->children + cur + 1,
1784 num_moved * sizeof(PyObject *));
1785 }
1786
1787 /* Leftover "tail" after the last removed child */
1788 cur = start + (size_t)slicelen * step;
1789 if (cur < (size_t)self->extra->length) {
1790 memmove(
1791 self->extra->children + cur - slicelen,
1792 self->extra->children + cur,
1793 (self->extra->length - cur) * sizeof(PyObject *));
1794 }
1795
1796 self->extra->length -= slicelen;
1797
1798 /* Discard the recycle list with all the deleted sub-elements */
1799 Py_XDECREF(recycle);
1800 return 0;
1801 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001802
1803 /* A new slice is actually being assigned */
1804 seq = PySequence_Fast(value, "");
1805 if (!seq) {
1806 PyErr_Format(
1807 PyExc_TypeError,
1808 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1809 );
1810 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001811 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001812 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001813
1814 if (step != 1 && newlen != slicelen)
1815 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001816 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001817 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001818 "attempt to assign sequence of size %zd "
1819 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 newlen, slicelen
1821 );
1822 return -1;
1823 }
1824
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001825 /* Resize before creating the recycle bin, to prevent refleaks. */
1826 if (newlen > slicelen) {
1827 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001828 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001829 return -1;
1830 }
1831 }
1832
1833 if (slicelen > 0) {
1834 /* to avoid recursive calls to this method (via decref), move
1835 old items to the recycle bin here, and get rid of them when
1836 we're done modifying the element */
1837 recycle = PyList_New(slicelen);
1838 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001839 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001840 return -1;
1841 }
1842 for (cur = start, i = 0; i < slicelen;
1843 cur += step, i++)
1844 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1845 }
1846
1847 if (newlen < slicelen) {
1848 /* delete slice */
1849 for (i = stop; i < self->extra->length; i++)
1850 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1851 } else if (newlen > slicelen) {
1852 /* insert slice */
1853 for (i = self->extra->length-1; i >= stop; i--)
1854 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1855 }
1856
1857 /* replace the slice */
1858 for (cur = start, i = 0; i < newlen;
1859 cur += step, i++) {
1860 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1861 Py_INCREF(element);
1862 self->extra->children[cur] = element;
1863 }
1864
1865 self->extra->length += newlen - slicelen;
1866
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001867 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001868
1869 /* discard the recycle bin, and everything in it */
1870 Py_XDECREF(recycle);
1871
1872 return 0;
1873 }
1874 else {
1875 PyErr_SetString(PyExc_TypeError,
1876 "element indices must be integers");
1877 return -1;
1878 }
1879}
1880
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001881static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001882element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001883{
1884 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001885 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001886
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001887 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001888 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001889
Alexander Belopolskye239d232010-12-08 23:31:48 +00001890 if (name == NULL)
1891 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001892
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001893 /* handle common attributes first */
1894 if (strcmp(name, "tag") == 0) {
1895 res = self->tag;
1896 Py_INCREF(res);
1897 return res;
1898 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001899 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001900 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001901 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001902 }
1903
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001904 /* methods */
1905 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1906 if (res)
1907 return res;
1908
1909 /* less common attributes */
1910 if (strcmp(name, "tail") == 0) {
1911 PyErr_Clear();
1912 res = element_get_tail(self);
1913 } else if (strcmp(name, "attrib") == 0) {
1914 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001915 if (!self->extra) {
1916 if (create_extra(self, NULL) < 0)
1917 return NULL;
1918 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001919 res = element_get_attrib(self);
1920 }
1921
1922 if (!res)
1923 return NULL;
1924
1925 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001926 return res;
1927}
1928
Eli Benderskyef9683b2013-05-18 07:52:34 -07001929static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001930element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001931{
Eli Benderskyb20df952012-05-20 06:33:29 +03001932 char *name = "";
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001933
1934 if (value == NULL) {
1935 PyErr_SetString(PyExc_AttributeError,
1936 "can't delete attribute");
1937 return -1;
1938 }
Eli Benderskyb20df952012-05-20 06:33:29 +03001939 if (PyUnicode_Check(nameobj))
1940 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001941 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001942 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001943
1944 if (strcmp(name, "tag") == 0) {
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001945 Py_INCREF(value);
1946 Py_SETREF(self->tag, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001947 } else if (strcmp(name, "text") == 0) {
1948 Py_DECREF(JOIN_OBJ(self->text));
1949 self->text = value;
1950 Py_INCREF(self->text);
1951 } else if (strcmp(name, "tail") == 0) {
1952 Py_DECREF(JOIN_OBJ(self->tail));
1953 self->tail = value;
1954 Py_INCREF(self->tail);
1955 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001956 if (!self->extra) {
1957 if (create_extra(self, NULL) < 0)
1958 return -1;
1959 }
Serhiy Storchaka191321d2015-12-27 15:41:34 +02001960 Py_INCREF(value);
1961 Py_SETREF(self->extra->attrib, value);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001963 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001964 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001965 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001966 }
1967
Eli Benderskyef9683b2013-05-18 07:52:34 -07001968 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001969}
1970
1971static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001972 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001973 0, /* sq_concat */
1974 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001975 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001976 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001977 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001978 0,
1979};
1980
Eli Bendersky64d11e62012-06-15 07:42:50 +03001981/******************************* Element iterator ****************************/
1982
1983/* ElementIterObject represents the iteration state over an XML element in
1984 * pre-order traversal. To keep track of which sub-element should be returned
1985 * next, a stack of parents is maintained. This is a standard stack-based
1986 * iterative pre-order traversal of a tree.
1987 * The stack is managed using a single-linked list starting at parent_stack.
1988 * Each stack node contains the saved parent to which we should return after
1989 * the current one is exhausted, and the next child to examine in that parent.
1990 */
1991typedef struct ParentLocator_t {
1992 ElementObject *parent;
1993 Py_ssize_t child_index;
1994 struct ParentLocator_t *next;
1995} ParentLocator;
1996
1997typedef struct {
1998 PyObject_HEAD
1999 ParentLocator *parent_stack;
2000 ElementObject *root_element;
2001 PyObject *sought_tag;
2002 int root_done;
2003 int gettext;
2004} ElementIterObject;
2005
2006
2007static void
2008elementiter_dealloc(ElementIterObject *it)
2009{
2010 ParentLocator *p = it->parent_stack;
2011 while (p) {
2012 ParentLocator *temp = p;
2013 Py_XDECREF(p->parent);
2014 p = p->next;
2015 PyObject_Free(temp);
2016 }
2017
2018 Py_XDECREF(it->sought_tag);
2019 Py_XDECREF(it->root_element);
2020
2021 PyObject_GC_UnTrack(it);
2022 PyObject_GC_Del(it);
2023}
2024
2025static int
2026elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2027{
2028 ParentLocator *p = it->parent_stack;
2029 while (p) {
2030 Py_VISIT(p->parent);
2031 p = p->next;
2032 }
2033
2034 Py_VISIT(it->root_element);
2035 Py_VISIT(it->sought_tag);
2036 return 0;
2037}
2038
2039/* Helper function for elementiter_next. Add a new parent to the parent stack.
2040 */
2041static ParentLocator *
2042parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2043{
2044 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2045 if (new_node) {
2046 new_node->parent = parent;
2047 Py_INCREF(parent);
2048 new_node->child_index = 0;
2049 new_node->next = stack;
2050 }
2051 return new_node;
2052}
2053
2054static PyObject *
2055elementiter_next(ElementIterObject *it)
2056{
2057 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002058 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002059 * A short note on gettext: this function serves both the iter() and
2060 * itertext() methods to avoid code duplication. However, there are a few
2061 * small differences in the way these iterations work. Namely:
2062 * - itertext() only yields text from nodes that have it, and continues
2063 * iterating when a node doesn't have text (so it doesn't return any
2064 * node like iter())
2065 * - itertext() also has to handle tail, after finishing with all the
2066 * children of a node.
2067 */
Eli Bendersky113da642012-06-15 07:52:49 +03002068 ElementObject *cur_parent;
2069 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002070 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002071 ElementObject *elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002072
2073 while (1) {
2074 /* Handle the case reached in the beginning and end of iteration, where
2075 * the parent stack is empty. The root_done flag gives us indication
2076 * whether we've just started iterating (so root_done is 0), in which
2077 * case the root is returned. If root_done is 1 and we're here, the
2078 * iterator is exhausted.
2079 */
2080 if (!it->parent_stack->parent) {
2081 if (it->root_done) {
2082 PyErr_SetNone(PyExc_StopIteration);
2083 return NULL;
2084 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002085 elem = it->root_element;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002086 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002087 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002088 if (!it->parent_stack) {
2089 PyErr_NoMemory();
2090 return NULL;
2091 }
2092
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002093 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002094 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002095 rc = (it->sought_tag == Py_None);
2096 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002097 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002098 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002099 if (rc < 0) {
2100 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002101 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002102 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002103 }
2104 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002105 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002106 PyObject *text = element_get_text(elem);
2107 if (!text) {
2108 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002109 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002110 }
2111 Py_INCREF(text);
2112 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002113 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002114 if (rc > 0)
2115 return text;
2116 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002117 if (rc < 0)
2118 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002119 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002120 return (PyObject *)elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002121 }
2122 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002123 else {
2124 Py_DECREF(elem);
2125 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126 }
2127 }
2128
2129 /* See if there are children left to traverse in the current parent. If
2130 * yes, visit the next child. If not, pop the stack and try again.
2131 */
Eli Bendersky113da642012-06-15 07:52:49 +03002132 cur_parent = it->parent_stack->parent;
2133 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002134 if (cur_parent->extra && child_index < cur_parent->extra->length) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002135 elem = (ElementObject *)cur_parent->extra->children[child_index];
Eli Bendersky64d11e62012-06-15 07:42:50 +03002136 it->parent_stack->child_index++;
2137 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002138 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002139 if (!it->parent_stack) {
2140 PyErr_NoMemory();
2141 return NULL;
2142 }
2143
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002144 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002146 PyObject *text = element_get_text(elem);
2147 if (!text) {
2148 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002149 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002150 }
2151 Py_INCREF(text);
2152 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002153 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002154 if (rc > 0)
2155 return text;
2156 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002157 if (rc < 0)
2158 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002159 } else {
2160 rc = (it->sought_tag == Py_None);
2161 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002162 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002163 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002164 if (rc < 0) {
2165 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002166 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002167 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002168 }
2169 if (rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002170 return (PyObject *)elem;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002171 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002172 Py_DECREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002173 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002174 }
2175 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002176 PyObject *tail;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002177 ParentLocator *next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002178 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002179 Py_INCREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002180 tail = element_get_tail(cur_parent);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002181 if (!tail) {
2182 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002183 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002184 }
2185 Py_INCREF(tail);
2186 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002187 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002188 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002189 tail = Py_None;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002190 Py_INCREF(tail);
2191 }
2192 next = it->parent_stack->next;
2193 cur_parent = it->parent_stack->parent;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002194 PyObject_Free(it->parent_stack);
2195 it->parent_stack = next;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002196 Py_XDECREF(cur_parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002197
2198 /* Note that extra condition on it->parent_stack->parent here;
2199 * this is because itertext() is supposed to only return *inner*
2200 * text, not text following the element it began iteration with.
2201 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002202 if (it->parent_stack->parent) {
2203 rc = PyObject_IsTrue(tail);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002204 if (rc > 0)
2205 return tail;
2206 Py_DECREF(tail);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002207 if (rc < 0)
2208 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002209 }
2210 else {
2211 Py_DECREF(tail);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002212 }
2213 }
2214 }
2215
2216 return NULL;
2217}
2218
2219
2220static PyTypeObject ElementIter_Type = {
2221 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002222 /* Using the module's name since the pure-Python implementation does not
2223 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002224 "_elementtree._element_iterator", /* tp_name */
2225 sizeof(ElementIterObject), /* tp_basicsize */
2226 0, /* tp_itemsize */
2227 /* methods */
2228 (destructor)elementiter_dealloc, /* tp_dealloc */
2229 0, /* tp_print */
2230 0, /* tp_getattr */
2231 0, /* tp_setattr */
2232 0, /* tp_reserved */
2233 0, /* tp_repr */
2234 0, /* tp_as_number */
2235 0, /* tp_as_sequence */
2236 0, /* tp_as_mapping */
2237 0, /* tp_hash */
2238 0, /* tp_call */
2239 0, /* tp_str */
2240 0, /* tp_getattro */
2241 0, /* tp_setattro */
2242 0, /* tp_as_buffer */
2243 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2244 0, /* tp_doc */
2245 (traverseproc)elementiter_traverse, /* tp_traverse */
2246 0, /* tp_clear */
2247 0, /* tp_richcompare */
2248 0, /* tp_weaklistoffset */
2249 PyObject_SelfIter, /* tp_iter */
2250 (iternextfunc)elementiter_next, /* tp_iternext */
2251 0, /* tp_methods */
2252 0, /* tp_members */
2253 0, /* tp_getset */
2254 0, /* tp_base */
2255 0, /* tp_dict */
2256 0, /* tp_descr_get */
2257 0, /* tp_descr_set */
2258 0, /* tp_dictoffset */
2259 0, /* tp_init */
2260 0, /* tp_alloc */
2261 0, /* tp_new */
2262};
2263
2264
2265static PyObject *
2266create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2267{
2268 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002269
2270 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2271 if (!it)
2272 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002273
Victor Stinner4d463432013-07-11 23:05:03 +02002274 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002275 it->sought_tag = tag;
2276 it->root_done = 0;
2277 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002278 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002279 it->root_element = self;
2280
Eli Bendersky64d11e62012-06-15 07:42:50 +03002281 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002282
2283 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2284 if (it->parent_stack == NULL) {
2285 Py_DECREF(it);
2286 PyErr_NoMemory();
2287 return NULL;
2288 }
2289 it->parent_stack->parent = NULL;
2290 it->parent_stack->child_index = 0;
2291 it->parent_stack->next = NULL;
2292
Eli Bendersky64d11e62012-06-15 07:42:50 +03002293 return (PyObject *)it;
2294}
2295
2296
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297/* ==================================================================== */
2298/* the tree builder type */
2299
2300typedef struct {
2301 PyObject_HEAD
2302
Eli Bendersky58d548d2012-05-29 15:45:16 +03002303 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002304
Antoine Pitrouee329312012-10-04 19:53:29 +02002305 PyObject *this; /* current node */
2306 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002307
Eli Bendersky58d548d2012-05-29 15:45:16 +03002308 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310 PyObject *stack; /* element stack */
2311 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002312
Eli Bendersky48d358b2012-05-30 17:57:50 +03002313 PyObject *element_factory;
2314
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 PyObject *events; /* list of events, or NULL if not collecting */
2317 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2318 PyObject *end_event_obj;
2319 PyObject *start_ns_event_obj;
2320 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002321} TreeBuilderObject;
2322
Christian Heimes90aa7642007-12-19 02:45:37 +00002323#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002324
2325/* -------------------------------------------------------------------- */
2326/* constructor and destructor */
2327
Eli Bendersky58d548d2012-05-29 15:45:16 +03002328static PyObject *
2329treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002330{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002331 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2332 if (t != NULL) {
2333 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334
Eli Bendersky58d548d2012-05-29 15:45:16 +03002335 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002336 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002337 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002338 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002339
Eli Bendersky58d548d2012-05-29 15:45:16 +03002340 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002341 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002342 t->stack = PyList_New(20);
2343 if (!t->stack) {
2344 Py_DECREF(t->this);
2345 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002346 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002347 return NULL;
2348 }
2349 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002350
Eli Bendersky58d548d2012-05-29 15:45:16 +03002351 t->events = NULL;
2352 t->start_event_obj = t->end_event_obj = NULL;
2353 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2354 }
2355 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356}
2357
Serhiy Storchakacb985562015-05-04 15:32:48 +03002358/*[clinic input]
2359_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002360
Serhiy Storchakacb985562015-05-04 15:32:48 +03002361 element_factory: object = NULL
2362
2363[clinic start generated code]*/
2364
2365static int
2366_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2367 PyObject *element_factory)
2368/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2369{
2370 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002371
2372 if (element_factory) {
2373 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002374 tmp = self->element_factory;
2375 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002376 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002377 }
2378
Eli Bendersky58d548d2012-05-29 15:45:16 +03002379 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002380}
2381
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382static int
2383treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2384{
2385 Py_VISIT(self->root);
2386 Py_VISIT(self->this);
2387 Py_VISIT(self->last);
2388 Py_VISIT(self->data);
2389 Py_VISIT(self->stack);
2390 Py_VISIT(self->element_factory);
2391 return 0;
2392}
2393
2394static int
2395treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002396{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002397 Py_CLEAR(self->end_ns_event_obj);
2398 Py_CLEAR(self->start_ns_event_obj);
2399 Py_CLEAR(self->end_event_obj);
2400 Py_CLEAR(self->start_event_obj);
2401 Py_CLEAR(self->events);
2402 Py_CLEAR(self->stack);
2403 Py_CLEAR(self->data);
2404 Py_CLEAR(self->last);
2405 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002406 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002407 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002408 return 0;
2409}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002410
Eli Bendersky48d358b2012-05-30 17:57:50 +03002411static void
2412treebuilder_dealloc(TreeBuilderObject *self)
2413{
2414 PyObject_GC_UnTrack(self);
2415 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002416 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002417}
2418
2419/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002420/* helpers for handling of arbitrary element-like objects */
2421
2422static int
2423treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2424 PyObject **dest, _Py_Identifier *name)
2425{
2426 if (Element_CheckExact(element)) {
2427 Py_DECREF(JOIN_OBJ(*dest));
2428 *dest = JOIN_SET(data, PyList_CheckExact(data));
2429 return 0;
2430 }
2431 else {
2432 PyObject *joined = list_join(data);
2433 int r;
2434 if (joined == NULL)
2435 return -1;
2436 r = _PyObject_SetAttrId(element, name, joined);
2437 Py_DECREF(joined);
2438 return r;
2439 }
2440}
2441
2442/* These two functions steal a reference to data */
2443static int
2444treebuilder_set_element_text(PyObject *element, PyObject *data)
2445{
2446 _Py_IDENTIFIER(text);
2447 return treebuilder_set_element_text_or_tail(
2448 element, data, &((ElementObject *) element)->text, &PyId_text);
2449}
2450
2451static int
2452treebuilder_set_element_tail(PyObject *element, PyObject *data)
2453{
2454 _Py_IDENTIFIER(tail);
2455 return treebuilder_set_element_text_or_tail(
2456 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2457}
2458
2459static int
2460treebuilder_add_subelement(PyObject *element, PyObject *child)
2461{
2462 _Py_IDENTIFIER(append);
2463 if (Element_CheckExact(element)) {
2464 ElementObject *elem = (ElementObject *) element;
2465 return element_add_subelement(elem, child);
2466 }
2467 else {
2468 PyObject *res;
2469 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2470 if (res == NULL)
2471 return -1;
2472 Py_DECREF(res);
2473 return 0;
2474 }
2475}
2476
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002477LOCAL(int)
2478treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2479 PyObject *node)
2480{
2481 if (action != NULL) {
2482 PyObject *res = PyTuple_Pack(2, action, node);
2483 if (res == NULL)
2484 return -1;
2485 if (PyList_Append(self->events, res) < 0) {
2486 Py_DECREF(res);
2487 return -1;
2488 }
2489 Py_DECREF(res);
2490 }
2491 return 0;
2492}
2493
Antoine Pitrouee329312012-10-04 19:53:29 +02002494/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002495/* handlers */
2496
2497LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002498treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2499 PyObject* attrib)
2500{
2501 PyObject* node;
2502 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002503 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002504
2505 if (self->data) {
2506 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002507 if (treebuilder_set_element_text(self->last, self->data))
2508 return NULL;
2509 }
2510 else {
2511 if (treebuilder_set_element_tail(self->last, self->data))
2512 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002513 }
2514 self->data = NULL;
2515 }
2516
Eli Bendersky08231a92013-05-18 15:47:16 -07002517 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002518 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2519 } else {
2520 node = create_new_element(tag, attrib);
2521 }
2522 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002523 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002524 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002525
Antoine Pitrouee329312012-10-04 19:53:29 +02002526 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527
2528 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002529 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002530 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531 } else {
2532 if (self->root) {
2533 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002534 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 "multiple elements on top level"
2536 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002537 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538 }
2539 Py_INCREF(node);
2540 self->root = node;
2541 }
2542
2543 if (self->index < PyList_GET_SIZE(self->stack)) {
2544 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002545 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002546 Py_INCREF(this);
2547 } else {
2548 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 }
2551 self->index++;
2552
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 Py_INCREF(node);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002554 Py_SETREF(self->this, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 Py_INCREF(node);
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002556 Py_SETREF(self->last, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002557
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002558 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2559 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560
2561 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002562
2563 error:
2564 Py_DECREF(node);
2565 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566}
2567
2568LOCAL(PyObject*)
2569treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2570{
2571 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002572 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002573 /* ignore calls to data before the first call to start */
2574 Py_RETURN_NONE;
2575 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 /* store the first item as is */
2577 Py_INCREF(data); self->data = data;
2578 } else {
2579 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002580 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2581 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002582 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583 /* expat often generates single character data sections; handle
2584 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002585 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2586 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002587 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002588 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589 } else if (PyList_CheckExact(self->data)) {
2590 if (PyList_Append(self->data, data) < 0)
2591 return NULL;
2592 } else {
2593 PyObject* list = PyList_New(2);
2594 if (!list)
2595 return NULL;
2596 PyList_SET_ITEM(list, 0, self->data);
2597 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2598 self->data = list;
2599 }
2600 }
2601
2602 Py_RETURN_NONE;
2603}
2604
2605LOCAL(PyObject*)
2606treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2607{
2608 PyObject* item;
2609
2610 if (self->data) {
2611 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002612 if (treebuilder_set_element_text(self->last, self->data))
2613 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002614 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002615 if (treebuilder_set_element_tail(self->last, self->data))
2616 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 }
2618 self->data = NULL;
2619 }
2620
2621 if (self->index == 0) {
2622 PyErr_SetString(
2623 PyExc_IndexError,
2624 "pop from empty stack"
2625 );
2626 return NULL;
2627 }
2628
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002629 item = self->last;
Antoine Pitrouee329312012-10-04 19:53:29 +02002630 self->last = self->this;
Serhiy Storchaka191321d2015-12-27 15:41:34 +02002631 self->index--;
2632 self->this = PyList_GET_ITEM(self->stack, self->index);
2633 Py_INCREF(self->this);
2634 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002635
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002636 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2637 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638
2639 Py_INCREF(self->last);
2640 return (PyObject*) self->last;
2641}
2642
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643/* -------------------------------------------------------------------- */
2644/* methods (in alphabetical order) */
2645
Serhiy Storchakacb985562015-05-04 15:32:48 +03002646/*[clinic input]
2647_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648
Serhiy Storchakacb985562015-05-04 15:32:48 +03002649 data: object
2650 /
2651
2652[clinic start generated code]*/
2653
2654static PyObject *
2655_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2656/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2657{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658 return treebuilder_handle_data(self, data);
2659}
2660
Serhiy Storchakacb985562015-05-04 15:32:48 +03002661/*[clinic input]
2662_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663
Serhiy Storchakacb985562015-05-04 15:32:48 +03002664 tag: object
2665 /
2666
2667[clinic start generated code]*/
2668
2669static PyObject *
2670_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2671/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2672{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673 return treebuilder_handle_end(self, tag);
2674}
2675
2676LOCAL(PyObject*)
2677treebuilder_done(TreeBuilderObject* self)
2678{
2679 PyObject* res;
2680
2681 /* FIXME: check stack size? */
2682
2683 if (self->root)
2684 res = self->root;
2685 else
2686 res = Py_None;
2687
2688 Py_INCREF(res);
2689 return res;
2690}
2691
Serhiy Storchakacb985562015-05-04 15:32:48 +03002692/*[clinic input]
2693_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002694
Serhiy Storchakacb985562015-05-04 15:32:48 +03002695[clinic start generated code]*/
2696
2697static PyObject *
2698_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2699/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2700{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002701 return treebuilder_done(self);
2702}
2703
Serhiy Storchakacb985562015-05-04 15:32:48 +03002704/*[clinic input]
2705_elementtree.TreeBuilder.start
2706
2707 tag: object
2708 attrs: object = None
2709 /
2710
2711[clinic start generated code]*/
2712
2713static PyObject *
2714_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2715 PyObject *attrs)
2716/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002717{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002718 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002719}
2720
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721/* ==================================================================== */
2722/* the expat interface */
2723
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002724#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002725#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002726
2727/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2728 * cached globally without being in per-module state.
2729 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002730static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002732
Eli Bendersky52467b12012-06-01 07:13:08 +03002733static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2734 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2735
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736typedef struct {
2737 PyObject_HEAD
2738
2739 XML_Parser parser;
2740
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002741 PyObject *target;
2742 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002744 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002745
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002746 PyObject *handle_start;
2747 PyObject *handle_data;
2748 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002749
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002750 PyObject *handle_comment;
2751 PyObject *handle_pi;
2752 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002754 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002755
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756} XMLParserObject;
2757
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002758static PyObject*
2759_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2760static PyObject *
2761_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2762 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002763
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764/* helpers */
2765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766LOCAL(PyObject*)
2767makeuniversal(XMLParserObject* self, const char* string)
2768{
2769 /* convert a UTF-8 tag/attribute name from the expat parser
2770 to a universal name string */
2771
Antoine Pitrouc1948842012-10-01 23:40:37 +02002772 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773 PyObject* key;
2774 PyObject* value;
2775
2776 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002777 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002778 if (!key)
2779 return NULL;
2780
2781 value = PyDict_GetItem(self->names, key);
2782
2783 if (value) {
2784 Py_INCREF(value);
2785 } else {
2786 /* new name. convert to universal name, and decode as
2787 necessary */
2788
2789 PyObject* tag;
2790 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002791 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002792
2793 /* look for namespace separator */
2794 for (i = 0; i < size; i++)
2795 if (string[i] == '}')
2796 break;
2797 if (i != size) {
2798 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002799 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002800 if (tag == NULL) {
2801 Py_DECREF(key);
2802 return NULL;
2803 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002804 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805 p[0] = '{';
2806 memcpy(p+1, string, size);
2807 size++;
2808 } else {
2809 /* plain name; use key as tag */
2810 Py_INCREF(key);
2811 tag = key;
2812 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002813
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002814 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002815 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002816 value = PyUnicode_DecodeUTF8(p, size, "strict");
2817 Py_DECREF(tag);
2818 if (!value) {
2819 Py_DECREF(key);
2820 return NULL;
2821 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002822
2823 /* add to names dictionary */
2824 if (PyDict_SetItem(self->names, key, value) < 0) {
2825 Py_DECREF(key);
2826 Py_DECREF(value);
2827 return NULL;
2828 }
2829 }
2830
2831 Py_DECREF(key);
2832 return value;
2833}
2834
Eli Bendersky5b77d812012-03-16 08:20:05 +02002835/* Set the ParseError exception with the given parameters.
2836 * If message is not NULL, it's used as the error string. Otherwise, the
2837 * message string is the default for the given error_code.
2838*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002839static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002840expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2841 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002842{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002843 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002844 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002845
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002846 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002847 message ? message : EXPAT(ErrorString)(error_code),
2848 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002849 if (errmsg == NULL)
2850 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002851
Eli Bendersky532d03e2013-08-10 08:00:39 -07002852 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002853 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002854 if (!error)
2855 return;
2856
Eli Bendersky5b77d812012-03-16 08:20:05 +02002857 /* Add code and position attributes */
2858 code = PyLong_FromLong((long)error_code);
2859 if (!code) {
2860 Py_DECREF(error);
2861 return;
2862 }
2863 if (PyObject_SetAttrString(error, "code", code) == -1) {
2864 Py_DECREF(error);
2865 Py_DECREF(code);
2866 return;
2867 }
2868 Py_DECREF(code);
2869
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002870 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002871 if (!position) {
2872 Py_DECREF(error);
2873 return;
2874 }
2875 if (PyObject_SetAttrString(error, "position", position) == -1) {
2876 Py_DECREF(error);
2877 Py_DECREF(position);
2878 return;
2879 }
2880 Py_DECREF(position);
2881
Eli Bendersky532d03e2013-08-10 08:00:39 -07002882 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002883 Py_DECREF(error);
2884}
2885
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002886/* -------------------------------------------------------------------- */
2887/* handlers */
2888
2889static void
2890expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2891 int data_len)
2892{
2893 PyObject* key;
2894 PyObject* value;
2895 PyObject* res;
2896
2897 if (data_len < 2 || data_in[0] != '&')
2898 return;
2899
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002900 if (PyErr_Occurred())
2901 return;
2902
Neal Norwitz0269b912007-08-08 06:56:02 +00002903 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002904 if (!key)
2905 return;
2906
2907 value = PyDict_GetItem(self->entity, key);
2908
2909 if (value) {
2910 if (TreeBuilder_CheckExact(self->target))
2911 res = treebuilder_handle_data(
2912 (TreeBuilderObject*) self->target, value
2913 );
2914 else if (self->handle_data)
2915 res = PyObject_CallFunction(self->handle_data, "O", value);
2916 else
2917 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002918 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002919 } else if (!PyErr_Occurred()) {
2920 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002921 char message[128] = "undefined entity ";
2922 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002923 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002924 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002925 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002926 EXPAT(GetErrorColumnNumber)(self->parser),
2927 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 );
2929 }
2930
2931 Py_DECREF(key);
2932}
2933
2934static void
2935expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2936 const XML_Char **attrib_in)
2937{
2938 PyObject* res;
2939 PyObject* tag;
2940 PyObject* attrib;
2941 int ok;
2942
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002943 if (PyErr_Occurred())
2944 return;
2945
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 /* tag name */
2947 tag = makeuniversal(self, tag_in);
2948 if (!tag)
2949 return; /* parser will look for errors */
2950
2951 /* attributes */
2952 if (attrib_in[0]) {
2953 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002954 if (!attrib) {
2955 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002957 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002958 while (attrib_in[0] && attrib_in[1]) {
2959 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002960 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002961 if (!key || !value) {
2962 Py_XDECREF(value);
2963 Py_XDECREF(key);
2964 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002965 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 return;
2967 }
2968 ok = PyDict_SetItem(attrib, key, value);
2969 Py_DECREF(value);
2970 Py_DECREF(key);
2971 if (ok < 0) {
2972 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002973 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 return;
2975 }
2976 attrib_in += 2;
2977 }
2978 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002979 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002980 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002981 if (!attrib) {
2982 Py_DECREF(tag);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002983 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002984 }
Eli Bendersky48d358b2012-05-30 17:57:50 +03002985 }
2986
2987 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 /* shortcut */
2989 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2990 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002991 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002992 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002994 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 res = NULL;
2996
2997 Py_DECREF(tag);
2998 Py_DECREF(attrib);
2999
3000 Py_XDECREF(res);
3001}
3002
3003static void
3004expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3005 int data_len)
3006{
3007 PyObject* data;
3008 PyObject* res;
3009
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003010 if (PyErr_Occurred())
3011 return;
3012
Neal Norwitz0269b912007-08-08 06:56:02 +00003013 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003014 if (!data)
3015 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016
3017 if (TreeBuilder_CheckExact(self->target))
3018 /* shortcut */
3019 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3020 else if (self->handle_data)
3021 res = PyObject_CallFunction(self->handle_data, "O", data);
3022 else
3023 res = NULL;
3024
3025 Py_DECREF(data);
3026
3027 Py_XDECREF(res);
3028}
3029
3030static void
3031expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3032{
3033 PyObject* tag;
3034 PyObject* res = NULL;
3035
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003036 if (PyErr_Occurred())
3037 return;
3038
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 if (TreeBuilder_CheckExact(self->target))
3040 /* shortcut */
3041 /* the standard tree builder doesn't look at the end tag */
3042 res = treebuilder_handle_end(
3043 (TreeBuilderObject*) self->target, Py_None
3044 );
3045 else if (self->handle_end) {
3046 tag = makeuniversal(self, tag_in);
3047 if (tag) {
3048 res = PyObject_CallFunction(self->handle_end, "O", tag);
3049 Py_DECREF(tag);
3050 }
3051 }
3052
3053 Py_XDECREF(res);
3054}
3055
3056static void
3057expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3058 const XML_Char *uri)
3059{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003060 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3061 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003063 if (PyErr_Occurred())
3064 return;
3065
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003066 if (!target->events || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003067 return;
3068
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003069 if (!uri)
3070 uri = "";
3071 if (!prefix)
3072 prefix = "";
3073
3074 parcel = Py_BuildValue("ss", prefix, uri);
3075 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003076 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003077 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3078 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079}
3080
3081static void
3082expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3083{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003084 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3085
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003086 if (PyErr_Occurred())
3087 return;
3088
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003089 if (!target->events)
3090 return;
3091
3092 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093}
3094
3095static void
3096expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3097{
3098 PyObject* comment;
3099 PyObject* res;
3100
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003101 if (PyErr_Occurred())
3102 return;
3103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003105 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106 if (comment) {
3107 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3108 Py_XDECREF(res);
3109 Py_DECREF(comment);
3110 }
3111 }
3112}
3113
Eli Bendersky45839902013-01-13 05:14:47 -08003114static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003115expat_start_doctype_handler(XMLParserObject *self,
3116 const XML_Char *doctype_name,
3117 const XML_Char *sysid,
3118 const XML_Char *pubid,
3119 int has_internal_subset)
3120{
3121 PyObject *self_pyobj = (PyObject *)self;
3122 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3123 PyObject *parser_doctype = NULL;
3124 PyObject *res = NULL;
3125
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003126 if (PyErr_Occurred())
3127 return;
3128
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003129 doctype_name_obj = makeuniversal(self, doctype_name);
3130 if (!doctype_name_obj)
3131 return;
3132
3133 if (sysid) {
3134 sysid_obj = makeuniversal(self, sysid);
3135 if (!sysid_obj) {
3136 Py_DECREF(doctype_name_obj);
3137 return;
3138 }
3139 } else {
3140 Py_INCREF(Py_None);
3141 sysid_obj = Py_None;
3142 }
3143
3144 if (pubid) {
3145 pubid_obj = makeuniversal(self, pubid);
3146 if (!pubid_obj) {
3147 Py_DECREF(doctype_name_obj);
3148 Py_DECREF(sysid_obj);
3149 return;
3150 }
3151 } else {
3152 Py_INCREF(Py_None);
3153 pubid_obj = Py_None;
3154 }
3155
3156 /* If the target has a handler for doctype, call it. */
3157 if (self->handle_doctype) {
3158 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3159 doctype_name_obj, pubid_obj, sysid_obj);
3160 Py_CLEAR(res);
3161 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003162 else {
3163 /* Now see if the parser itself has a doctype method. If yes and it's
3164 * a custom method, call it but warn about deprecation. If it's only
3165 * the vanilla XMLParser method, do nothing.
3166 */
3167 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3168 if (parser_doctype &&
3169 !(PyCFunction_Check(parser_doctype) &&
3170 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3171 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003172 (PyCFunction) _elementtree_XMLParser_doctype)) {
3173 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3174 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003175 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003176 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003177 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003178 res = PyObject_CallFunction(parser_doctype, "OOO",
3179 doctype_name_obj, pubid_obj, sysid_obj);
3180 Py_CLEAR(res);
3181 }
3182 }
3183
3184clear:
3185 Py_XDECREF(parser_doctype);
3186 Py_DECREF(doctype_name_obj);
3187 Py_DECREF(pubid_obj);
3188 Py_DECREF(sysid_obj);
3189}
3190
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003191static void
3192expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3193 const XML_Char* data_in)
3194{
3195 PyObject* target;
3196 PyObject* data;
3197 PyObject* res;
3198
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003199 if (PyErr_Occurred())
3200 return;
3201
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003202 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003203 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3204 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003205 if (target && data) {
3206 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3207 Py_XDECREF(res);
3208 Py_DECREF(data);
3209 Py_DECREF(target);
3210 } else {
3211 Py_XDECREF(data);
3212 Py_XDECREF(target);
3213 }
3214 }
3215}
3216
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218
Eli Bendersky52467b12012-06-01 07:13:08 +03003219static PyObject *
3220xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003221{
Eli Bendersky52467b12012-06-01 07:13:08 +03003222 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3223 if (self) {
3224 self->parser = NULL;
3225 self->target = self->entity = self->names = NULL;
3226 self->handle_start = self->handle_data = self->handle_end = NULL;
3227 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003228 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003230 return (PyObject *)self;
3231}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232
Serhiy Storchakacb985562015-05-04 15:32:48 +03003233/*[clinic input]
3234_elementtree.XMLParser.__init__
3235
3236 html: object = NULL
3237 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003238 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003239
3240[clinic start generated code]*/
3241
Eli Bendersky52467b12012-06-01 07:13:08 +03003242static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3244 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003245/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003246{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003247 self->entity = PyDict_New();
3248 if (!self->entity)
3249 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003250
Serhiy Storchakacb985562015-05-04 15:32:48 +03003251 self->names = PyDict_New();
3252 if (!self->names) {
3253 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003254 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003256
Serhiy Storchakacb985562015-05-04 15:32:48 +03003257 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3258 if (!self->parser) {
3259 Py_CLEAR(self->entity);
3260 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003262 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 }
3264
Eli Bendersky52467b12012-06-01 07:13:08 +03003265 if (target) {
3266 Py_INCREF(target);
3267 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003268 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003269 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 Py_CLEAR(self->entity);
3271 Py_CLEAR(self->names);
3272 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003273 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003274 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003276 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277
Serhiy Storchakacb985562015-05-04 15:32:48 +03003278 self->handle_start = PyObject_GetAttrString(target, "start");
3279 self->handle_data = PyObject_GetAttrString(target, "data");
3280 self->handle_end = PyObject_GetAttrString(target, "end");
3281 self->handle_comment = PyObject_GetAttrString(target, "comment");
3282 self->handle_pi = PyObject_GetAttrString(target, "pi");
3283 self->handle_close = PyObject_GetAttrString(target, "close");
3284 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003285
3286 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003289 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003291 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003292 (XML_StartElementHandler) expat_start_handler,
3293 (XML_EndElementHandler) expat_end_handler
3294 );
3295 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003296 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003297 (XML_DefaultHandler) expat_default_handler
3298 );
3299 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003300 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 (XML_CharacterDataHandler) expat_data_handler
3302 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003305 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003306 (XML_CommentHandler) expat_comment_handler
3307 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003308 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 (XML_ProcessingInstructionHandler) expat_pi_handler
3312 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003313 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003314 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003315 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3316 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003319 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003320 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321
Eli Bendersky52467b12012-06-01 07:13:08 +03003322 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003323}
3324
Eli Bendersky52467b12012-06-01 07:13:08 +03003325static int
3326xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3327{
3328 Py_VISIT(self->handle_close);
3329 Py_VISIT(self->handle_pi);
3330 Py_VISIT(self->handle_comment);
3331 Py_VISIT(self->handle_end);
3332 Py_VISIT(self->handle_data);
3333 Py_VISIT(self->handle_start);
3334
3335 Py_VISIT(self->target);
3336 Py_VISIT(self->entity);
3337 Py_VISIT(self->names);
3338
3339 return 0;
3340}
3341
3342static int
3343xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344{
3345 EXPAT(ParserFree)(self->parser);
3346
Antoine Pitrouc1948842012-10-01 23:40:37 +02003347 Py_CLEAR(self->handle_close);
3348 Py_CLEAR(self->handle_pi);
3349 Py_CLEAR(self->handle_comment);
3350 Py_CLEAR(self->handle_end);
3351 Py_CLEAR(self->handle_data);
3352 Py_CLEAR(self->handle_start);
3353 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354
Antoine Pitrouc1948842012-10-01 23:40:37 +02003355 Py_CLEAR(self->target);
3356 Py_CLEAR(self->entity);
3357 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358
Eli Bendersky52467b12012-06-01 07:13:08 +03003359 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003360}
3361
Eli Bendersky52467b12012-06-01 07:13:08 +03003362static void
3363xmlparser_dealloc(XMLParserObject* self)
3364{
3365 PyObject_GC_UnTrack(self);
3366 xmlparser_gc_clear(self);
3367 Py_TYPE(self)->tp_free((PyObject *)self);
3368}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003369
3370LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003371expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003372{
3373 int ok;
3374
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003375 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003376 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3377
3378 if (PyErr_Occurred())
3379 return NULL;
3380
3381 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003382 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003383 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003384 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003385 EXPAT(GetErrorColumnNumber)(self->parser),
3386 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387 );
3388 return NULL;
3389 }
3390
3391 Py_RETURN_NONE;
3392}
3393
Serhiy Storchakacb985562015-05-04 15:32:48 +03003394/*[clinic input]
3395_elementtree.XMLParser.close
3396
3397[clinic start generated code]*/
3398
3399static PyObject *
3400_elementtree_XMLParser_close_impl(XMLParserObject *self)
3401/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003402{
3403 /* end feeding data to parser */
3404
3405 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003407 if (!res)
3408 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003409
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003410 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 Py_DECREF(res);
3412 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003413 }
3414 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003415 Py_DECREF(res);
3416 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003417 }
3418 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003419 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003420 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421}
3422
Serhiy Storchakacb985562015-05-04 15:32:48 +03003423/*[clinic input]
3424_elementtree.XMLParser.feed
3425
3426 data: object
3427 /
3428
3429[clinic start generated code]*/
3430
3431static PyObject *
3432_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3433/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003434{
3435 /* feed data to parser */
3436
Serhiy Storchakacb985562015-05-04 15:32:48 +03003437 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003438 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003439 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3440 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003441 return NULL;
3442 if (data_len > INT_MAX) {
3443 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3444 return NULL;
3445 }
3446 /* Explicitly set UTF-8 encoding. Return code ignored. */
3447 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003448 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003449 }
3450 else {
3451 Py_buffer view;
3452 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003453 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003454 return NULL;
3455 if (view.len > INT_MAX) {
3456 PyBuffer_Release(&view);
3457 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3458 return NULL;
3459 }
3460 res = expat_parse(self, view.buf, (int)view.len, 0);
3461 PyBuffer_Release(&view);
3462 return res;
3463 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003464}
3465
Serhiy Storchakacb985562015-05-04 15:32:48 +03003466/*[clinic input]
3467_elementtree.XMLParser._parse_whole
3468
3469 file: object
3470 /
3471
3472[clinic start generated code]*/
3473
3474static PyObject *
3475_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3476/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003477{
Eli Benderskya3699232013-05-19 18:47:23 -07003478 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 PyObject* reader;
3480 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003481 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 PyObject* res;
3483
Serhiy Storchakacb985562015-05-04 15:32:48 +03003484 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003485 if (!reader)
3486 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003487
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003488 /* read from open file object */
3489 for (;;) {
3490
3491 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3492
3493 if (!buffer) {
3494 /* read failed (e.g. due to KeyboardInterrupt) */
3495 Py_DECREF(reader);
3496 return NULL;
3497 }
3498
Eli Benderskyf996e772012-03-16 05:53:30 +02003499 if (PyUnicode_CheckExact(buffer)) {
3500 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003501 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003502 Py_DECREF(buffer);
3503 break;
3504 }
3505 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003506 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003507 if (!temp) {
3508 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003509 Py_DECREF(reader);
3510 return NULL;
3511 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003512 buffer = temp;
3513 }
3514 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 Py_DECREF(buffer);
3516 break;
3517 }
3518
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003519 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3520 Py_DECREF(buffer);
3521 Py_DECREF(reader);
3522 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3523 return NULL;
3524 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003526 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003527 );
3528
3529 Py_DECREF(buffer);
3530
3531 if (!res) {
3532 Py_DECREF(reader);
3533 return NULL;
3534 }
3535 Py_DECREF(res);
3536
3537 }
3538
3539 Py_DECREF(reader);
3540
3541 res = expat_parse(self, "", 0, 1);
3542
3543 if (res && TreeBuilder_CheckExact(self->target)) {
3544 Py_DECREF(res);
3545 return treebuilder_done((TreeBuilderObject*) self->target);
3546 }
3547
3548 return res;
3549}
3550
Serhiy Storchakacb985562015-05-04 15:32:48 +03003551/*[clinic input]
3552_elementtree.XMLParser.doctype
3553
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003554 name: object
3555 pubid: object
3556 system: object
3557 /
3558
Serhiy Storchakacb985562015-05-04 15:32:48 +03003559[clinic start generated code]*/
3560
3561static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003562_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3563 PyObject *pubid, PyObject *system)
3564/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003565{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003566 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3567 "This method of XMLParser is deprecated. Define"
3568 " doctype() method on the TreeBuilder target.",
3569 1) < 0) {
3570 return NULL;
3571 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003572 Py_RETURN_NONE;
3573}
3574
Serhiy Storchakacb985562015-05-04 15:32:48 +03003575/*[clinic input]
3576_elementtree.XMLParser._setevents
3577
3578 events_queue: object(subclass_of='&PyList_Type')
3579 events_to_report: object = None
3580 /
3581
3582[clinic start generated code]*/
3583
3584static PyObject *
3585_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3586 PyObject *events_queue,
3587 PyObject *events_to_report)
3588/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589{
3590 /* activate element event reporting */
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003591 Py_ssize_t i;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003592 TreeBuilderObject *target;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003593 PyObject *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003594
3595 if (!TreeBuilder_CheckExact(self->target)) {
3596 PyErr_SetString(
3597 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003598 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599 "targets"
3600 );
3601 return NULL;
3602 }
3603
3604 target = (TreeBuilderObject*) self->target;
3605
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003606 Py_INCREF(events_queue);
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003607 Py_SETREF(target->events, events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003608
3609 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003610 Py_CLEAR(target->start_event_obj);
3611 Py_CLEAR(target->end_event_obj);
3612 Py_CLEAR(target->start_ns_event_obj);
3613 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003614
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003615 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003617 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003618 Py_RETURN_NONE;
3619 }
3620
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003621 if (!(events_seq = PySequence_Fast(events_to_report,
3622 "events must be a sequence"))) {
3623 return NULL;
3624 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003626 for (i = 0; i < PySequence_Size(events_seq); ++i) {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003627 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3628 char *event_name = NULL;
3629 if (PyUnicode_Check(event_name_obj)) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003630 event_name = PyUnicode_AsUTF8(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003631 } else if (PyBytes_Check(event_name_obj)) {
3632 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003633 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003634 if (event_name == NULL) {
3635 Py_DECREF(events_seq);
3636 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3637 return NULL;
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003638 }
3639
3640 Py_INCREF(event_name_obj);
3641 if (strcmp(event_name, "start") == 0) {
3642 Py_SETREF(target->start_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003643 } else if (strcmp(event_name, "end") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003644 Py_SETREF(target->end_event_obj, event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003645 } else if (strcmp(event_name, "start-ns") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003646 Py_SETREF(target->start_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003647 EXPAT(SetNamespaceDeclHandler)(
3648 self->parser,
3649 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3650 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3651 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003652 } else if (strcmp(event_name, "end-ns") == 0) {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003653 Py_SETREF(target->end_ns_event_obj, event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003654 EXPAT(SetNamespaceDeclHandler)(
3655 self->parser,
3656 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3657 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3658 );
3659 } else {
Serhiy Storchakabc4ded952015-12-24 11:51:57 +02003660 Py_DECREF(event_name_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003661 Py_DECREF(events_seq);
3662 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003663 return NULL;
3664 }
3665 }
3666
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003667 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003668 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669}
3670
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003671static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003672xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003674 if (PyUnicode_Check(nameobj)) {
3675 PyObject* res;
3676 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3677 res = self->entity;
3678 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3679 res = self->target;
3680 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3681 return PyUnicode_FromFormat(
3682 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003683 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003684 }
3685 else
3686 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003687
Alexander Belopolskye239d232010-12-08 23:31:48 +00003688 Py_INCREF(res);
3689 return res;
3690 }
3691 generic:
3692 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003693}
3694
Serhiy Storchakacb985562015-05-04 15:32:48 +03003695#include "clinic/_elementtree.c.h"
3696
3697static PyMethodDef element_methods[] = {
3698
3699 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3700
3701 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3702 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3703
3704 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3705 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3706 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3707
3708 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3709 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3710 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3711 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3712
3713 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3714 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3715 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3716
3717 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3718 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3719
3720 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3721 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3722
3723 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3724
3725 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3726 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3727 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3728 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3729 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3730
3731 {NULL, NULL}
3732};
3733
3734static PyMappingMethods element_as_mapping = {
3735 (lenfunc) element_length,
3736 (binaryfunc) element_subscr,
3737 (objobjargproc) element_ass_subscr,
3738};
3739
3740static PyTypeObject Element_Type = {
3741 PyVarObject_HEAD_INIT(NULL, 0)
3742 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3743 /* methods */
3744 (destructor)element_dealloc, /* tp_dealloc */
3745 0, /* tp_print */
3746 0, /* tp_getattr */
3747 0, /* tp_setattr */
3748 0, /* tp_reserved */
3749 (reprfunc)element_repr, /* tp_repr */
3750 0, /* tp_as_number */
3751 &element_as_sequence, /* tp_as_sequence */
3752 &element_as_mapping, /* tp_as_mapping */
3753 0, /* tp_hash */
3754 0, /* tp_call */
3755 0, /* tp_str */
3756 (getattrofunc)element_getattro, /* tp_getattro */
3757 (setattrofunc)element_setattro, /* tp_setattro */
3758 0, /* tp_as_buffer */
3759 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3760 /* tp_flags */
3761 0, /* tp_doc */
3762 (traverseproc)element_gc_traverse, /* tp_traverse */
3763 (inquiry)element_gc_clear, /* tp_clear */
3764 0, /* tp_richcompare */
3765 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3766 0, /* tp_iter */
3767 0, /* tp_iternext */
3768 element_methods, /* tp_methods */
3769 0, /* tp_members */
3770 0, /* tp_getset */
3771 0, /* tp_base */
3772 0, /* tp_dict */
3773 0, /* tp_descr_get */
3774 0, /* tp_descr_set */
3775 0, /* tp_dictoffset */
3776 (initproc)element_init, /* tp_init */
3777 PyType_GenericAlloc, /* tp_alloc */
3778 element_new, /* tp_new */
3779 0, /* tp_free */
3780};
3781
3782static PyMethodDef treebuilder_methods[] = {
3783 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3784 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3785 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3786 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3787 {NULL, NULL}
3788};
3789
3790static PyTypeObject TreeBuilder_Type = {
3791 PyVarObject_HEAD_INIT(NULL, 0)
3792 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3793 /* methods */
3794 (destructor)treebuilder_dealloc, /* tp_dealloc */
3795 0, /* tp_print */
3796 0, /* tp_getattr */
3797 0, /* tp_setattr */
3798 0, /* tp_reserved */
3799 0, /* tp_repr */
3800 0, /* tp_as_number */
3801 0, /* tp_as_sequence */
3802 0, /* tp_as_mapping */
3803 0, /* tp_hash */
3804 0, /* tp_call */
3805 0, /* tp_str */
3806 0, /* tp_getattro */
3807 0, /* tp_setattro */
3808 0, /* tp_as_buffer */
3809 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3810 /* tp_flags */
3811 0, /* tp_doc */
3812 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3813 (inquiry)treebuilder_gc_clear, /* tp_clear */
3814 0, /* tp_richcompare */
3815 0, /* tp_weaklistoffset */
3816 0, /* tp_iter */
3817 0, /* tp_iternext */
3818 treebuilder_methods, /* tp_methods */
3819 0, /* tp_members */
3820 0, /* tp_getset */
3821 0, /* tp_base */
3822 0, /* tp_dict */
3823 0, /* tp_descr_get */
3824 0, /* tp_descr_set */
3825 0, /* tp_dictoffset */
3826 _elementtree_TreeBuilder___init__, /* tp_init */
3827 PyType_GenericAlloc, /* tp_alloc */
3828 treebuilder_new, /* tp_new */
3829 0, /* tp_free */
3830};
3831
3832static PyMethodDef xmlparser_methods[] = {
3833 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3834 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3835 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3836 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3837 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3838 {NULL, NULL}
3839};
3840
Neal Norwitz227b5332006-03-22 09:28:35 +00003841static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003842 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003843 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003844 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003845 (destructor)xmlparser_dealloc, /* tp_dealloc */
3846 0, /* tp_print */
3847 0, /* tp_getattr */
3848 0, /* tp_setattr */
3849 0, /* tp_reserved */
3850 0, /* tp_repr */
3851 0, /* tp_as_number */
3852 0, /* tp_as_sequence */
3853 0, /* tp_as_mapping */
3854 0, /* tp_hash */
3855 0, /* tp_call */
3856 0, /* tp_str */
3857 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3858 0, /* tp_setattro */
3859 0, /* tp_as_buffer */
3860 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3861 /* tp_flags */
3862 0, /* tp_doc */
3863 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3864 (inquiry)xmlparser_gc_clear, /* tp_clear */
3865 0, /* tp_richcompare */
3866 0, /* tp_weaklistoffset */
3867 0, /* tp_iter */
3868 0, /* tp_iternext */
3869 xmlparser_methods, /* tp_methods */
3870 0, /* tp_members */
3871 0, /* tp_getset */
3872 0, /* tp_base */
3873 0, /* tp_dict */
3874 0, /* tp_descr_get */
3875 0, /* tp_descr_set */
3876 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003877 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003878 PyType_GenericAlloc, /* tp_alloc */
3879 xmlparser_new, /* tp_new */
3880 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003881};
3882
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003883/* ==================================================================== */
3884/* python module interface */
3885
3886static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003887 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003888 {NULL, NULL}
3889};
3890
Martin v. Löwis1a214512008-06-11 05:26:20 +00003891
Eli Bendersky532d03e2013-08-10 08:00:39 -07003892static struct PyModuleDef elementtreemodule = {
3893 PyModuleDef_HEAD_INIT,
3894 "_elementtree",
3895 NULL,
3896 sizeof(elementtreestate),
3897 _functions,
3898 NULL,
3899 elementtree_traverse,
3900 elementtree_clear,
3901 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003902};
3903
Neal Norwitzf6657e62006-12-28 04:47:50 +00003904PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003905PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003906{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003907 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003908 elementtreestate *st;
3909
3910 m = PyState_FindModule(&elementtreemodule);
3911 if (m) {
3912 Py_INCREF(m);
3913 return m;
3914 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003915
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003916 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003917 if (PyType_Ready(&ElementIter_Type) < 0)
3918 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003919 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003920 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003921 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003922 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003923 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003924 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003925
Eli Bendersky532d03e2013-08-10 08:00:39 -07003926 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003927 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003928 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003929 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003930
Eli Bendersky828efde2012-04-05 05:40:58 +03003931 if (!(temp = PyImport_ImportModule("copy")))
3932 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003933 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003934 Py_XDECREF(temp);
3935
Eli Bendersky532d03e2013-08-10 08:00:39 -07003936 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003937 return NULL;
3938
Eli Bendersky20d41742012-06-01 09:48:37 +03003939 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003940 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3941 if (expat_capi) {
3942 /* check that it's usable */
3943 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003944 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003945 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3946 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003947 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003948 PyErr_SetString(PyExc_ImportError,
3949 "pyexpat version is incompatible");
3950 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003951 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003952 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003953 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003954 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003955
Eli Bendersky532d03e2013-08-10 08:00:39 -07003956 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003957 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003958 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003959 Py_INCREF(st->parseerror_obj);
3960 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003961
Eli Bendersky092af1f2012-03-04 07:14:03 +02003962 Py_INCREF((PyObject *)&Element_Type);
3963 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3964
Eli Bendersky58d548d2012-05-29 15:45:16 +03003965 Py_INCREF((PyObject *)&TreeBuilder_Type);
3966 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3967
Eli Bendersky52467b12012-06-01 07:13:08 +03003968 Py_INCREF((PyObject *)&XMLParser_Type);
3969 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003970
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003971 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003972}