blob: 8eb655c1f4df2950727d2af542f6457bf8ea2928 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +0200850 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
Martin v. Löwisbce16662012-06-17 10:41:22 +0200851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
938 Py_CLEAR(self->tag);
939 self->tag = tag;
940 Py_INCREF(self->tag);
941
Eli Benderskydd3661e2013-09-13 06:24:25 -0700942 _clear_joined_ptr(&self->text);
943 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
944 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945
Eli Benderskydd3661e2013-09-13 06:24:25 -0700946 _clear_joined_ptr(&self->tail);
947 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
948 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949
950 /* Handle ATTRIB and CHILDREN. */
951 if (!children && !attrib)
952 Py_RETURN_NONE;
953
954 /* Compute 'nchildren'. */
955 if (children) {
956 if (!PyList_Check(children)) {
957 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
958 return NULL;
959 }
960 nchildren = PyList_Size(children);
961 }
962 else {
963 nchildren = 0;
964 }
965
966 /* Allocate 'extra'. */
967 if (element_resize(self, nchildren)) {
968 return NULL;
969 }
970 assert(self->extra && self->extra->allocated >= nchildren);
971
972 /* Copy children */
973 for (i = 0; i < nchildren; i++) {
974 self->extra->children[i] = PyList_GET_ITEM(children, i);
975 Py_INCREF(self->extra->children[i]);
976 }
977
978 self->extra->length = nchildren;
979 self->extra->allocated = nchildren;
980
981 /* Stash attrib. */
982 if (attrib) {
983 Py_CLEAR(self->extra->attrib);
984 self->extra->attrib = attrib;
985 Py_INCREF(attrib);
986 }
987
988 Py_RETURN_NONE;
989}
990
991/* __setstate__ for Element instance from the Python implementation.
992 * 'state' should be the instance dict.
993 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300994
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995static PyObject *
996element_setstate_from_Python(ElementObject *self, PyObject *state)
997{
998 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
999 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1000 PyObject *args;
1001 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001002 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004 tag = attrib = text = tail = children = NULL;
1005 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001008
1009 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1010 &attrib, &text, &tail, &children))
1011 retval = element_setstate_from_attributes(self, tag, attrib, text,
1012 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001014 retval = NULL;
1015
1016 Py_DECREF(args);
1017 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018}
1019
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020/*[clinic input]
1021_elementtree.Element.__setstate__
1022
1023 state: object
1024 /
1025
1026[clinic start generated code]*/
1027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001029_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1030/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031{
1032 if (!PyDict_CheckExact(state)) {
1033 PyErr_Format(PyExc_TypeError,
1034 "Don't know how to unpickle \"%.200R\" as an Element",
1035 state);
1036 return NULL;
1037 }
1038 else
1039 return element_setstate_from_Python(self, state);
1040}
1041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042LOCAL(int)
1043checkpath(PyObject* tag)
1044{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 Py_ssize_t i;
1046 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047
1048 /* check if a tag contains an xpath character */
1049
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050#define PATHCHAR(ch) \
1051 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1055 void *data = PyUnicode_DATA(tag);
1056 unsigned int kind = PyUnicode_KIND(tag);
1057 for (i = 0; i < len; i++) {
1058 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1059 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 return 1;
1065 }
1066 return 0;
1067 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001068 if (PyBytes_Check(tag)) {
1069 char *p = PyBytes_AS_STRING(tag);
1070 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071 if (p[i] == '{')
1072 check = 0;
1073 else if (p[i] == '}')
1074 check = 1;
1075 else if (check && PATHCHAR(p[i]))
1076 return 1;
1077 }
1078 return 0;
1079 }
1080
1081 return 1; /* unknown type; might be path expression */
1082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.extend
1086
1087 elements: object
1088 /
1089
1090[clinic start generated code]*/
1091
1092static PyObject *
1093_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1094/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095{
1096 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001097 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098
Serhiy Storchakacb985562015-05-04 15:32:48 +03001099 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100 if (!seq) {
1101 PyErr_Format(
1102 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104 );
1105 return NULL;
1106 }
1107
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001110 Py_INCREF(element);
1111 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001112 PyErr_Format(
1113 PyExc_TypeError,
1114 "expected an Element, not \"%.200s\"",
1115 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001116 Py_DECREF(seq);
1117 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001118 return NULL;
1119 }
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (element_add_subelement(self, element) < 0) {
1122 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 return NULL;
1125 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 }
1128
1129 Py_DECREF(seq);
1130
1131 Py_RETURN_NONE;
1132}
1133
Serhiy Storchakacb985562015-05-04 15:32:48 +03001134/*[clinic input]
1135_elementtree.Element.find
1136
1137 path: object
1138 namespaces: object = None
1139
1140[clinic start generated code]*/
1141
1142static PyObject *
1143_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1144 PyObject *namespaces)
1145/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001147 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001148 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149
Serhiy Storchakacb985562015-05-04 15:32:48 +03001150 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001151 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001153 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001155 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156
1157 if (!self->extra)
1158 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 for (i = 0; i < self->extra->length; i++) {
1161 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001162 int rc;
1163 if (!Element_CheckExact(item))
1164 continue;
1165 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001166 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_DECREF(item);
1170 if (rc < 0)
1171 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 }
1173
1174 Py_RETURN_NONE;
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.findtext
1179
1180 path: object
1181 default: object = None
1182 namespaces: object = None
1183
1184[clinic start generated code]*/
1185
1186static PyObject *
1187_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1188 PyObject *default_value,
1189 PyObject *namespaces)
1190/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001192 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001193 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001194 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 );
1200
1201 if (!self->extra) {
1202 Py_INCREF(default_value);
1203 return default_value;
1204 }
1205
1206 for (i = 0; i < self->extra->length; i++) {
1207 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 int rc;
1209 if (!Element_CheckExact(item))
1210 continue;
1211 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001212 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 if (text == Py_None) {
1216 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001217 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001219 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001220 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 return text;
1222 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 Py_DECREF(item);
1224 if (rc < 0)
1225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 Py_INCREF(default_value);
1229 return default_value;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.findall
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001247 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001248 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001249
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001251 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001252 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001255 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256
1257 out = PyList_New(0);
1258 if (!out)
1259 return NULL;
1260
1261 if (!self->extra)
1262 return out;
1263
1264 for (i = 0; i < self->extra->length; i++) {
1265 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 int rc;
1267 if (!Element_CheckExact(item))
1268 continue;
1269 Py_INCREF(item);
1270 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1271 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1272 Py_DECREF(item);
1273 Py_DECREF(out);
1274 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001276 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 }
1278
1279 return out;
1280}
1281
Serhiy Storchakacb985562015-05-04 15:32:48 +03001282/*[clinic input]
1283_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001284
Serhiy Storchakacb985562015-05-04 15:32:48 +03001285 path: object
1286 namespaces: object = None
1287
1288[clinic start generated code]*/
1289
1290static PyObject *
1291_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1292 PyObject *namespaces)
1293/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1294{
1295 PyObject* tag = path;
1296 _Py_IDENTIFIER(iterfind);
1297 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001298
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001299 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001300 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001301}
1302
Serhiy Storchakacb985562015-05-04 15:32:48 +03001303/*[clinic input]
1304_elementtree.Element.get
1305
1306 key: object
1307 default: object = None
1308
1309[clinic start generated code]*/
1310
1311static PyObject *
1312_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1313 PyObject *default_value)
1314/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
1316 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
1318 if (!self->extra || self->extra->attrib == Py_None)
1319 value = default_value;
1320 else {
1321 value = PyDict_GetItem(self->extra->attrib, key);
1322 if (!value)
1323 value = default_value;
1324 }
1325
1326 Py_INCREF(value);
1327 return value;
1328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.getchildren
1332
1333[clinic start generated code]*/
1334
1335static PyObject *
1336_elementtree_Element_getchildren_impl(ElementObject *self)
1337/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001339 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340 PyObject* list;
1341
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001342 /* FIXME: report as deprecated? */
1343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344 if (!self->extra)
1345 return PyList_New(0);
1346
1347 list = PyList_New(self->extra->length);
1348 if (!list)
1349 return NULL;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
1353 Py_INCREF(item);
1354 PyList_SET_ITEM(list, i, item);
1355 }
1356
1357 return list;
1358}
1359
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001360
Eli Bendersky64d11e62012-06-15 07:42:50 +03001361static PyObject *
1362create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1363
1364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365/*[clinic input]
1366_elementtree.Element.iter
1367
1368 tag: object = None
1369
1370[clinic start generated code]*/
1371
Eli Bendersky64d11e62012-06-15 07:42:50 +03001372static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001373_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1374/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001375{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001376 if (PyUnicode_Check(tag)) {
1377 if (PyUnicode_READY(tag) < 0)
1378 return NULL;
1379 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1380 tag = Py_None;
1381 }
1382 else if (PyBytes_Check(tag)) {
1383 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1384 tag = Py_None;
1385 }
1386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001388}
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Serhiy Storchakacb985562015-05-04 15:32:48 +03001394[clinic start generated code]*/
1395
1396static PyObject *
1397_elementtree_Element_itertext_impl(ElementObject *self)
1398/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1399{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401}
1402
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001405element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001406{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001407 ElementObject* self = (ElementObject*) self_;
1408
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409 if (!self->extra || index < 0 || index >= self->extra->length) {
1410 PyErr_SetString(
1411 PyExc_IndexError,
1412 "child index out of range"
1413 );
1414 return NULL;
1415 }
1416
1417 Py_INCREF(self->extra->children[index]);
1418 return self->extra->children[index];
1419}
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
1422_elementtree.Element.insert
1423
1424 index: Py_ssize_t
1425 subelement: object(subclass_of='&Element_Type')
1426 /
1427
1428[clinic start generated code]*/
1429
1430static PyObject *
1431_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1432 PyObject *subelement)
1433/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001435 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436
Victor Stinner5f0af232013-07-11 23:01:36 +02001437 if (!self->extra) {
1438 if (create_extra(self, NULL) < 0)
1439 return NULL;
1440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001442 if (index < 0) {
1443 index += self->extra->length;
1444 if (index < 0)
1445 index = 0;
1446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001447 if (index > self->extra->length)
1448 index = self->extra->length;
1449
1450 if (element_resize(self, 1) < 0)
1451 return NULL;
1452
1453 for (i = self->extra->length; i > index; i--)
1454 self->extra->children[i] = self->extra->children[i-1];
1455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456 Py_INCREF(subelement);
1457 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458
1459 self->extra->length++;
1460
1461 Py_RETURN_NONE;
1462}
1463
Serhiy Storchakacb985562015-05-04 15:32:48 +03001464/*[clinic input]
1465_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466
Serhiy Storchakacb985562015-05-04 15:32:48 +03001467[clinic start generated code]*/
1468
1469static PyObject *
1470_elementtree_Element_items_impl(ElementObject *self)
1471/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1472{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (!self->extra || self->extra->attrib == Py_None)
1474 return PyList_New(0);
1475
1476 return PyDict_Items(self->extra->attrib);
1477}
1478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479/*[clinic input]
1480_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482[clinic start generated code]*/
1483
1484static PyObject *
1485_elementtree_Element_keys_impl(ElementObject *self)
1486/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1487{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488 if (!self->extra || self->extra->attrib == Py_None)
1489 return PyList_New(0);
1490
1491 return PyDict_Keys(self->extra->attrib);
1492}
1493
Martin v. Löwis18e16552006-02-15 17:27:45 +00001494static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495element_length(ElementObject* self)
1496{
1497 if (!self->extra)
1498 return 0;
1499
1500 return self->extra->length;
1501}
1502
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503/*[clinic input]
1504_elementtree.Element.makeelement
1505
1506 tag: object
1507 attrib: object
1508 /
1509
1510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1514 PyObject *attrib)
1515/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516{
1517 PyObject* elem;
1518
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519 attrib = PyDict_Copy(attrib);
1520 if (!attrib)
1521 return NULL;
1522
Eli Bendersky092af1f2012-03-04 07:14:03 +02001523 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524
1525 Py_DECREF(attrib);
1526
1527 return elem;
1528}
1529
Serhiy Storchakacb985562015-05-04 15:32:48 +03001530/*[clinic input]
1531_elementtree.Element.remove
1532
1533 subelement: object(subclass_of='&Element_Type')
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1540/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001542 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001543 int rc;
1544 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546 if (!self->extra) {
1547 /* element has no children, so raise exception */
1548 PyErr_SetString(
1549 PyExc_ValueError,
1550 "list.remove(x): x not in list"
1551 );
1552 return NULL;
1553 }
1554
1555 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001558 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001559 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001560 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001561 if (rc < 0)
1562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 }
1564
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001565 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001566 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 PyErr_SetString(
1568 PyExc_ValueError,
1569 "list.remove(x): x not in list"
1570 );
1571 return NULL;
1572 }
1573
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001574 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
1576 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577 for (; i < self->extra->length; i++)
1578 self->extra->children[i] = self->extra->children[i+1];
1579
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001580 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581 Py_RETURN_NONE;
1582}
1583
1584static PyObject*
1585element_repr(ElementObject* self)
1586{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001587 if (self->tag)
1588 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1589 else
1590 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591}
1592
Serhiy Storchakacb985562015-05-04 15:32:48 +03001593/*[clinic input]
1594_elementtree.Element.set
1595
1596 key: object
1597 value: object
1598 /
1599
1600[clinic start generated code]*/
1601
1602static PyObject *
1603_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1604 PyObject *value)
1605/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606{
1607 PyObject* attrib;
1608
Victor Stinner5f0af232013-07-11 23:01:36 +02001609 if (!self->extra) {
1610 if (create_extra(self, NULL) < 0)
1611 return NULL;
1612 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613
1614 attrib = element_get_attrib(self);
1615 if (!attrib)
1616 return NULL;
1617
1618 if (PyDict_SetItem(attrib, key, value) < 0)
1619 return NULL;
1620
1621 Py_RETURN_NONE;
1622}
1623
1624static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001625element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001627 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001628 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 PyObject* old;
1630
1631 if (!self->extra || index < 0 || index >= self->extra->length) {
1632 PyErr_SetString(
1633 PyExc_IndexError,
1634 "child assignment index out of range");
1635 return -1;
1636 }
1637
1638 old = self->extra->children[index];
1639
1640 if (item) {
1641 Py_INCREF(item);
1642 self->extra->children[index] = item;
1643 } else {
1644 self->extra->length--;
1645 for (i = index; i < self->extra->length; i++)
1646 self->extra->children[i] = self->extra->children[i+1];
1647 }
1648
1649 Py_DECREF(old);
1650
1651 return 0;
1652}
1653
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001654static PyObject*
1655element_subscr(PyObject* self_, PyObject* item)
1656{
1657 ElementObject* self = (ElementObject*) self_;
1658
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001659 if (PyIndex_Check(item)) {
1660 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001661
1662 if (i == -1 && PyErr_Occurred()) {
1663 return NULL;
1664 }
1665 if (i < 0 && self->extra)
1666 i += self->extra->length;
1667 return element_getitem(self_, i);
1668 }
1669 else if (PySlice_Check(item)) {
1670 Py_ssize_t start, stop, step, slicelen, cur, i;
1671 PyObject* list;
1672
1673 if (!self->extra)
1674 return PyList_New(0);
1675
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001676 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001677 self->extra->length,
1678 &start, &stop, &step, &slicelen) < 0) {
1679 return NULL;
1680 }
1681
1682 if (slicelen <= 0)
1683 return PyList_New(0);
1684 else {
1685 list = PyList_New(slicelen);
1686 if (!list)
1687 return NULL;
1688
1689 for (cur = start, i = 0; i < slicelen;
1690 cur += step, i++) {
1691 PyObject* item = self->extra->children[cur];
1692 Py_INCREF(item);
1693 PyList_SET_ITEM(list, i, item);
1694 }
1695
1696 return list;
1697 }
1698 }
1699 else {
1700 PyErr_SetString(PyExc_TypeError,
1701 "element indices must be integers");
1702 return NULL;
1703 }
1704}
1705
1706static int
1707element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1708{
1709 ElementObject* self = (ElementObject*) self_;
1710
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001711 if (PyIndex_Check(item)) {
1712 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001713
1714 if (i == -1 && PyErr_Occurred()) {
1715 return -1;
1716 }
1717 if (i < 0 && self->extra)
1718 i += self->extra->length;
1719 return element_setitem(self_, i, value);
1720 }
1721 else if (PySlice_Check(item)) {
1722 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1723
1724 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001725 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
Victor Stinner5f0af232013-07-11 23:01:36 +02001727 if (!self->extra) {
1728 if (create_extra(self, NULL) < 0)
1729 return -1;
1730 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001731
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001732 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001733 self->extra->length,
1734 &start, &stop, &step, &slicelen) < 0) {
1735 return -1;
1736 }
1737
Eli Bendersky865756a2012-03-09 13:38:15 +02001738 if (value == NULL) {
1739 /* Delete slice */
1740 size_t cur;
1741 Py_ssize_t i;
1742
1743 if (slicelen <= 0)
1744 return 0;
1745
1746 /* Since we're deleting, the direction of the range doesn't matter,
1747 * so for simplicity make it always ascending.
1748 */
1749 if (step < 0) {
1750 stop = start + 1;
1751 start = stop + step * (slicelen - 1) - 1;
1752 step = -step;
1753 }
1754
1755 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1756
1757 /* recycle is a list that will contain all the children
1758 * scheduled for removal.
1759 */
1760 if (!(recycle = PyList_New(slicelen))) {
1761 PyErr_NoMemory();
1762 return -1;
1763 }
1764
1765 /* This loop walks over all the children that have to be deleted,
1766 * with cur pointing at them. num_moved is the amount of children
1767 * until the next deleted child that have to be "shifted down" to
1768 * occupy the deleted's places.
1769 * Note that in the ith iteration, shifting is done i+i places down
1770 * because i children were already removed.
1771 */
1772 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1773 /* Compute how many children have to be moved, clipping at the
1774 * list end.
1775 */
1776 Py_ssize_t num_moved = step - 1;
1777 if (cur + step >= (size_t)self->extra->length) {
1778 num_moved = self->extra->length - cur - 1;
1779 }
1780
1781 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1782
1783 memmove(
1784 self->extra->children + cur - i,
1785 self->extra->children + cur + 1,
1786 num_moved * sizeof(PyObject *));
1787 }
1788
1789 /* Leftover "tail" after the last removed child */
1790 cur = start + (size_t)slicelen * step;
1791 if (cur < (size_t)self->extra->length) {
1792 memmove(
1793 self->extra->children + cur - slicelen,
1794 self->extra->children + cur,
1795 (self->extra->length - cur) * sizeof(PyObject *));
1796 }
1797
1798 self->extra->length -= slicelen;
1799
1800 /* Discard the recycle list with all the deleted sub-elements */
1801 Py_XDECREF(recycle);
1802 return 0;
1803 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001804
1805 /* A new slice is actually being assigned */
1806 seq = PySequence_Fast(value, "");
1807 if (!seq) {
1808 PyErr_Format(
1809 PyExc_TypeError,
1810 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1811 );
1812 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001813 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001814 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001815
1816 if (step != 1 && newlen != slicelen)
1817 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001818 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001819 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 "attempt to assign sequence of size %zd "
1821 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001822 newlen, slicelen
1823 );
1824 return -1;
1825 }
1826
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001827 /* Resize before creating the recycle bin, to prevent refleaks. */
1828 if (newlen > slicelen) {
1829 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001830 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831 return -1;
1832 }
1833 }
1834
1835 if (slicelen > 0) {
1836 /* to avoid recursive calls to this method (via decref), move
1837 old items to the recycle bin here, and get rid of them when
1838 we're done modifying the element */
1839 recycle = PyList_New(slicelen);
1840 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001841 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001842 return -1;
1843 }
1844 for (cur = start, i = 0; i < slicelen;
1845 cur += step, i++)
1846 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1847 }
1848
1849 if (newlen < slicelen) {
1850 /* delete slice */
1851 for (i = stop; i < self->extra->length; i++)
1852 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1853 } else if (newlen > slicelen) {
1854 /* insert slice */
1855 for (i = self->extra->length-1; i >= stop; i--)
1856 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1857 }
1858
1859 /* replace the slice */
1860 for (cur = start, i = 0; i < newlen;
1861 cur += step, i++) {
1862 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1863 Py_INCREF(element);
1864 self->extra->children[cur] = element;
1865 }
1866
1867 self->extra->length += newlen - slicelen;
1868
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870
1871 /* discard the recycle bin, and everything in it */
1872 Py_XDECREF(recycle);
1873
1874 return 0;
1875 }
1876 else {
1877 PyErr_SetString(PyExc_TypeError,
1878 "element indices must be integers");
1879 return -1;
1880 }
1881}
1882
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001884element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885{
1886 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001887 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001888
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001889 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001890 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001891
Alexander Belopolskye239d232010-12-08 23:31:48 +00001892 if (name == NULL)
1893 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001894
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001895 /* handle common attributes first */
1896 if (strcmp(name, "tag") == 0) {
1897 res = self->tag;
1898 Py_INCREF(res);
1899 return res;
1900 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001902 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001903 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001904 }
1905
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001906 /* methods */
1907 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1908 if (res)
1909 return res;
1910
1911 /* less common attributes */
1912 if (strcmp(name, "tail") == 0) {
1913 PyErr_Clear();
1914 res = element_get_tail(self);
1915 } else if (strcmp(name, "attrib") == 0) {
1916 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001917 if (!self->extra) {
1918 if (create_extra(self, NULL) < 0)
1919 return NULL;
1920 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001921 res = element_get_attrib(self);
1922 }
1923
1924 if (!res)
1925 return NULL;
1926
1927 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001928 return res;
1929}
1930
Eli Benderskyef9683b2013-05-18 07:52:34 -07001931static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001932element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001933{
Eli Benderskyb20df952012-05-20 06:33:29 +03001934 char *name = "";
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001935
1936 if (value == NULL) {
1937 PyErr_SetString(PyExc_AttributeError,
1938 "can't delete attribute");
1939 return -1;
1940 }
Eli Benderskyb20df952012-05-20 06:33:29 +03001941 if (PyUnicode_Check(nameobj))
1942 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001943 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001944 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001945
1946 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001947 Py_DECREF(self->tag);
1948 self->tag = value;
1949 Py_INCREF(self->tag);
1950 } else if (strcmp(name, "text") == 0) {
1951 Py_DECREF(JOIN_OBJ(self->text));
1952 self->text = value;
1953 Py_INCREF(self->text);
1954 } else if (strcmp(name, "tail") == 0) {
1955 Py_DECREF(JOIN_OBJ(self->tail));
1956 self->tail = value;
1957 Py_INCREF(self->tail);
1958 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001959 if (!self->extra) {
1960 if (create_extra(self, NULL) < 0)
1961 return -1;
1962 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001963 Py_DECREF(self->extra->attrib);
1964 self->extra->attrib = value;
1965 Py_INCREF(self->extra->attrib);
1966 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001967 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001968 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001969 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001970 }
1971
Eli Benderskyef9683b2013-05-18 07:52:34 -07001972 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001973}
1974
1975static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001976 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001977 0, /* sq_concat */
1978 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001980 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001981 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001982 0,
1983};
1984
Eli Bendersky64d11e62012-06-15 07:42:50 +03001985/******************************* Element iterator ****************************/
1986
1987/* ElementIterObject represents the iteration state over an XML element in
1988 * pre-order traversal. To keep track of which sub-element should be returned
1989 * next, a stack of parents is maintained. This is a standard stack-based
1990 * iterative pre-order traversal of a tree.
1991 * The stack is managed using a single-linked list starting at parent_stack.
1992 * Each stack node contains the saved parent to which we should return after
1993 * the current one is exhausted, and the next child to examine in that parent.
1994 */
1995typedef struct ParentLocator_t {
1996 ElementObject *parent;
1997 Py_ssize_t child_index;
1998 struct ParentLocator_t *next;
1999} ParentLocator;
2000
2001typedef struct {
2002 PyObject_HEAD
2003 ParentLocator *parent_stack;
2004 ElementObject *root_element;
2005 PyObject *sought_tag;
2006 int root_done;
2007 int gettext;
2008} ElementIterObject;
2009
2010
2011static void
2012elementiter_dealloc(ElementIterObject *it)
2013{
2014 ParentLocator *p = it->parent_stack;
2015 while (p) {
2016 ParentLocator *temp = p;
2017 Py_XDECREF(p->parent);
2018 p = p->next;
2019 PyObject_Free(temp);
2020 }
2021
2022 Py_XDECREF(it->sought_tag);
2023 Py_XDECREF(it->root_element);
2024
2025 PyObject_GC_UnTrack(it);
2026 PyObject_GC_Del(it);
2027}
2028
2029static int
2030elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2031{
2032 ParentLocator *p = it->parent_stack;
2033 while (p) {
2034 Py_VISIT(p->parent);
2035 p = p->next;
2036 }
2037
2038 Py_VISIT(it->root_element);
2039 Py_VISIT(it->sought_tag);
2040 return 0;
2041}
2042
2043/* Helper function for elementiter_next. Add a new parent to the parent stack.
2044 */
2045static ParentLocator *
2046parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2047{
2048 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2049 if (new_node) {
2050 new_node->parent = parent;
2051 Py_INCREF(parent);
2052 new_node->child_index = 0;
2053 new_node->next = stack;
2054 }
2055 return new_node;
2056}
2057
2058static PyObject *
2059elementiter_next(ElementIterObject *it)
2060{
2061 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002062 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002063 * A short note on gettext: this function serves both the iter() and
2064 * itertext() methods to avoid code duplication. However, there are a few
2065 * small differences in the way these iterations work. Namely:
2066 * - itertext() only yields text from nodes that have it, and continues
2067 * iterating when a node doesn't have text (so it doesn't return any
2068 * node like iter())
2069 * - itertext() also has to handle tail, after finishing with all the
2070 * children of a node.
2071 */
Eli Bendersky113da642012-06-15 07:52:49 +03002072 ElementObject *cur_parent;
2073 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002074 int rc;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002075 ElementObject *elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002076
2077 while (1) {
2078 /* Handle the case reached in the beginning and end of iteration, where
2079 * the parent stack is empty. The root_done flag gives us indication
2080 * whether we've just started iterating (so root_done is 0), in which
2081 * case the root is returned. If root_done is 1 and we're here, the
2082 * iterator is exhausted.
2083 */
2084 if (!it->parent_stack->parent) {
2085 if (it->root_done) {
2086 PyErr_SetNone(PyExc_StopIteration);
2087 return NULL;
2088 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002089 elem = it->root_element;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002091 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002092 if (!it->parent_stack) {
2093 PyErr_NoMemory();
2094 return NULL;
2095 }
2096
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002097 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002099 rc = (it->sought_tag == Py_None);
2100 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002101 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002102 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002103 if (rc < 0) {
2104 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002105 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002106 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002107 }
2108 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002109 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002110 PyObject *text = element_get_text(elem);
2111 if (!text) {
2112 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002113 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002114 }
2115 Py_INCREF(text);
2116 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002117 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002118 if (rc > 0)
2119 return text;
2120 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002121 if (rc < 0)
2122 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002123 } else {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002124 return (PyObject *)elem;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002125 }
2126 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002127 else {
2128 Py_DECREF(elem);
2129 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002130 }
2131 }
2132
2133 /* See if there are children left to traverse in the current parent. If
2134 * yes, visit the next child. If not, pop the stack and try again.
2135 */
Eli Bendersky113da642012-06-15 07:52:49 +03002136 cur_parent = it->parent_stack->parent;
2137 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002138 if (cur_parent->extra && child_index < cur_parent->extra->length) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002139 elem = (ElementObject *)cur_parent->extra->children[child_index];
Eli Bendersky64d11e62012-06-15 07:42:50 +03002140 it->parent_stack->child_index++;
2141 it->parent_stack = parent_stack_push_new(it->parent_stack,
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002142 elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002143 if (!it->parent_stack) {
2144 PyErr_NoMemory();
2145 return NULL;
2146 }
2147
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002148 Py_INCREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002150 PyObject *text = element_get_text(elem);
2151 if (!text) {
2152 Py_DECREF(elem);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002153 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002154 }
2155 Py_INCREF(text);
2156 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002157 rc = PyObject_IsTrue(text);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002158 if (rc > 0)
2159 return text;
2160 Py_DECREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002161 if (rc < 0)
2162 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002163 } else {
2164 rc = (it->sought_tag == Py_None);
2165 if (!rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002166 rc = PyObject_RichCompareBool(elem->tag,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002167 it->sought_tag, Py_EQ);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002168 if (rc < 0) {
2169 Py_DECREF(elem);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002170 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002171 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002172 }
2173 if (rc) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002174 return (PyObject *)elem;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002175 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002176 Py_DECREF(elem);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002177 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002178 }
2179 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002180 PyObject *tail;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002181 ParentLocator *next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002182 if (it->gettext) {
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002183 Py_INCREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002184 tail = element_get_tail(cur_parent);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002185 if (!tail) {
2186 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002187 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002188 }
2189 Py_INCREF(tail);
2190 Py_DECREF(cur_parent);
Eli Benderskye6174ca2013-01-10 06:27:53 -08002191 }
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002192 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002193 tail = Py_None;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002194 Py_INCREF(tail);
2195 }
2196 next = it->parent_stack->next;
2197 cur_parent = it->parent_stack->parent;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002198 PyObject_Free(it->parent_stack);
2199 it->parent_stack = next;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002200 Py_XDECREF(cur_parent);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002201
2202 /* Note that extra condition on it->parent_stack->parent here;
2203 * this is because itertext() is supposed to only return *inner*
2204 * text, not text following the element it began iteration with.
2205 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002206 if (it->parent_stack->parent) {
2207 rc = PyObject_IsTrue(tail);
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002208 if (rc > 0)
2209 return tail;
2210 Py_DECREF(tail);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002211 if (rc < 0)
2212 return NULL;
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002213 }
2214 else {
2215 Py_DECREF(tail);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002216 }
2217 }
2218 }
2219
2220 return NULL;
2221}
2222
2223
2224static PyTypeObject ElementIter_Type = {
2225 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002226 /* Using the module's name since the pure-Python implementation does not
2227 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002228 "_elementtree._element_iterator", /* tp_name */
2229 sizeof(ElementIterObject), /* tp_basicsize */
2230 0, /* tp_itemsize */
2231 /* methods */
2232 (destructor)elementiter_dealloc, /* tp_dealloc */
2233 0, /* tp_print */
2234 0, /* tp_getattr */
2235 0, /* tp_setattr */
2236 0, /* tp_reserved */
2237 0, /* tp_repr */
2238 0, /* tp_as_number */
2239 0, /* tp_as_sequence */
2240 0, /* tp_as_mapping */
2241 0, /* tp_hash */
2242 0, /* tp_call */
2243 0, /* tp_str */
2244 0, /* tp_getattro */
2245 0, /* tp_setattro */
2246 0, /* tp_as_buffer */
2247 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2248 0, /* tp_doc */
2249 (traverseproc)elementiter_traverse, /* tp_traverse */
2250 0, /* tp_clear */
2251 0, /* tp_richcompare */
2252 0, /* tp_weaklistoffset */
2253 PyObject_SelfIter, /* tp_iter */
2254 (iternextfunc)elementiter_next, /* tp_iternext */
2255 0, /* tp_methods */
2256 0, /* tp_members */
2257 0, /* tp_getset */
2258 0, /* tp_base */
2259 0, /* tp_dict */
2260 0, /* tp_descr_get */
2261 0, /* tp_descr_set */
2262 0, /* tp_dictoffset */
2263 0, /* tp_init */
2264 0, /* tp_alloc */
2265 0, /* tp_new */
2266};
2267
2268
2269static PyObject *
2270create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2271{
2272 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002273
2274 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2275 if (!it)
2276 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002277
Victor Stinner4d463432013-07-11 23:05:03 +02002278 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002279 it->sought_tag = tag;
2280 it->root_done = 0;
2281 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002282 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002283 it->root_element = self;
2284
Eli Bendersky64d11e62012-06-15 07:42:50 +03002285 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002286
2287 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2288 if (it->parent_stack == NULL) {
2289 Py_DECREF(it);
2290 PyErr_NoMemory();
2291 return NULL;
2292 }
2293 it->parent_stack->parent = NULL;
2294 it->parent_stack->child_index = 0;
2295 it->parent_stack->next = NULL;
2296
Eli Bendersky64d11e62012-06-15 07:42:50 +03002297 return (PyObject *)it;
2298}
2299
2300
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002301/* ==================================================================== */
2302/* the tree builder type */
2303
2304typedef struct {
2305 PyObject_HEAD
2306
Eli Bendersky58d548d2012-05-29 15:45:16 +03002307 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002308
Antoine Pitrouee329312012-10-04 19:53:29 +02002309 PyObject *this; /* current node */
2310 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002311
Eli Bendersky58d548d2012-05-29 15:45:16 +03002312 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002313
Eli Bendersky58d548d2012-05-29 15:45:16 +03002314 PyObject *stack; /* element stack */
2315 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002316
Eli Bendersky48d358b2012-05-30 17:57:50 +03002317 PyObject *element_factory;
2318
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002319 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002320 PyObject *events; /* list of events, or NULL if not collecting */
2321 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2322 PyObject *end_event_obj;
2323 PyObject *start_ns_event_obj;
2324 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325} TreeBuilderObject;
2326
Christian Heimes90aa7642007-12-19 02:45:37 +00002327#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328
2329/* -------------------------------------------------------------------- */
2330/* constructor and destructor */
2331
Eli Bendersky58d548d2012-05-29 15:45:16 +03002332static PyObject *
2333treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002335 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2336 if (t != NULL) {
2337 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002338
Eli Bendersky58d548d2012-05-29 15:45:16 +03002339 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002340 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002341 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002342 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002343
Eli Bendersky58d548d2012-05-29 15:45:16 +03002344 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002345 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002346 t->stack = PyList_New(20);
2347 if (!t->stack) {
2348 Py_DECREF(t->this);
2349 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002350 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002351 return NULL;
2352 }
2353 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 t->events = NULL;
2356 t->start_event_obj = t->end_event_obj = NULL;
2357 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2358 }
2359 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002360}
2361
Serhiy Storchakacb985562015-05-04 15:32:48 +03002362/*[clinic input]
2363_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002364
Serhiy Storchakacb985562015-05-04 15:32:48 +03002365 element_factory: object = NULL
2366
2367[clinic start generated code]*/
2368
2369static int
2370_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2371 PyObject *element_factory)
2372/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2373{
2374 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002375
2376 if (element_factory) {
2377 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002378 tmp = self->element_factory;
2379 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002380 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002381 }
2382
Eli Bendersky58d548d2012-05-29 15:45:16 +03002383 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384}
2385
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386static int
2387treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2388{
2389 Py_VISIT(self->root);
2390 Py_VISIT(self->this);
2391 Py_VISIT(self->last);
2392 Py_VISIT(self->data);
2393 Py_VISIT(self->stack);
2394 Py_VISIT(self->element_factory);
2395 return 0;
2396}
2397
2398static int
2399treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002400{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002401 Py_CLEAR(self->end_ns_event_obj);
2402 Py_CLEAR(self->start_ns_event_obj);
2403 Py_CLEAR(self->end_event_obj);
2404 Py_CLEAR(self->start_event_obj);
2405 Py_CLEAR(self->events);
2406 Py_CLEAR(self->stack);
2407 Py_CLEAR(self->data);
2408 Py_CLEAR(self->last);
2409 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002410 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002411 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002412 return 0;
2413}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002414
Eli Bendersky48d358b2012-05-30 17:57:50 +03002415static void
2416treebuilder_dealloc(TreeBuilderObject *self)
2417{
2418 PyObject_GC_UnTrack(self);
2419 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002420 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002421}
2422
2423/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002424/* helpers for handling of arbitrary element-like objects */
2425
2426static int
2427treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2428 PyObject **dest, _Py_Identifier *name)
2429{
2430 if (Element_CheckExact(element)) {
2431 Py_DECREF(JOIN_OBJ(*dest));
2432 *dest = JOIN_SET(data, PyList_CheckExact(data));
2433 return 0;
2434 }
2435 else {
2436 PyObject *joined = list_join(data);
2437 int r;
2438 if (joined == NULL)
2439 return -1;
2440 r = _PyObject_SetAttrId(element, name, joined);
2441 Py_DECREF(joined);
2442 return r;
2443 }
2444}
2445
2446/* These two functions steal a reference to data */
2447static int
2448treebuilder_set_element_text(PyObject *element, PyObject *data)
2449{
2450 _Py_IDENTIFIER(text);
2451 return treebuilder_set_element_text_or_tail(
2452 element, data, &((ElementObject *) element)->text, &PyId_text);
2453}
2454
2455static int
2456treebuilder_set_element_tail(PyObject *element, PyObject *data)
2457{
2458 _Py_IDENTIFIER(tail);
2459 return treebuilder_set_element_text_or_tail(
2460 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2461}
2462
2463static int
2464treebuilder_add_subelement(PyObject *element, PyObject *child)
2465{
2466 _Py_IDENTIFIER(append);
2467 if (Element_CheckExact(element)) {
2468 ElementObject *elem = (ElementObject *) element;
2469 return element_add_subelement(elem, child);
2470 }
2471 else {
2472 PyObject *res;
2473 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2474 if (res == NULL)
2475 return -1;
2476 Py_DECREF(res);
2477 return 0;
2478 }
2479}
2480
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002481LOCAL(int)
2482treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2483 PyObject *node)
2484{
2485 if (action != NULL) {
2486 PyObject *res = PyTuple_Pack(2, action, node);
2487 if (res == NULL)
2488 return -1;
2489 if (PyList_Append(self->events, res) < 0) {
2490 Py_DECREF(res);
2491 return -1;
2492 }
2493 Py_DECREF(res);
2494 }
2495 return 0;
2496}
2497
Antoine Pitrouee329312012-10-04 19:53:29 +02002498/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499/* handlers */
2500
2501LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002502treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2503 PyObject* attrib)
2504{
2505 PyObject* node;
2506 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002507 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002508
2509 if (self->data) {
2510 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002511 if (treebuilder_set_element_text(self->last, self->data))
2512 return NULL;
2513 }
2514 else {
2515 if (treebuilder_set_element_tail(self->last, self->data))
2516 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517 }
2518 self->data = NULL;
2519 }
2520
Eli Bendersky08231a92013-05-18 15:47:16 -07002521 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002522 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2523 } else {
2524 node = create_new_element(tag, attrib);
2525 }
2526 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002528 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002529
Antoine Pitrouee329312012-10-04 19:53:29 +02002530 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002531
2532 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002533 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002534 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002535 } else {
2536 if (self->root) {
2537 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002538 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002539 "multiple elements on top level"
2540 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002541 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002542 }
2543 Py_INCREF(node);
2544 self->root = node;
2545 }
2546
2547 if (self->index < PyList_GET_SIZE(self->stack)) {
2548 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002550 Py_INCREF(this);
2551 } else {
2552 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002553 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002554 }
2555 self->index++;
2556
2557 Py_DECREF(this);
2558 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002559 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560
2561 Py_DECREF(self->last);
2562 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002563 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002565 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2566 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002567
2568 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002569
2570 error:
2571 Py_DECREF(node);
2572 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002573}
2574
2575LOCAL(PyObject*)
2576treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2577{
2578 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002579 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002580 /* ignore calls to data before the first call to start */
2581 Py_RETURN_NONE;
2582 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002583 /* store the first item as is */
2584 Py_INCREF(data); self->data = data;
2585 } else {
2586 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002587 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2588 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002589 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002590 /* expat often generates single character data sections; handle
2591 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002592 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2593 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002595 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002596 } else if (PyList_CheckExact(self->data)) {
2597 if (PyList_Append(self->data, data) < 0)
2598 return NULL;
2599 } else {
2600 PyObject* list = PyList_New(2);
2601 if (!list)
2602 return NULL;
2603 PyList_SET_ITEM(list, 0, self->data);
2604 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2605 self->data = list;
2606 }
2607 }
2608
2609 Py_RETURN_NONE;
2610}
2611
2612LOCAL(PyObject*)
2613treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2614{
2615 PyObject* item;
2616
2617 if (self->data) {
2618 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002619 if (treebuilder_set_element_text(self->last, self->data))
2620 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002621 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002622 if (treebuilder_set_element_tail(self->last, self->data))
2623 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002624 }
2625 self->data = NULL;
2626 }
2627
2628 if (self->index == 0) {
2629 PyErr_SetString(
2630 PyExc_IndexError,
2631 "pop from empty stack"
2632 );
2633 return NULL;
2634 }
2635
2636 self->index--;
2637
2638 item = PyList_GET_ITEM(self->stack, self->index);
2639 Py_INCREF(item);
2640
2641 Py_DECREF(self->last);
2642
Antoine Pitrouee329312012-10-04 19:53:29 +02002643 self->last = self->this;
2644 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002645
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002646 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2647 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648
2649 Py_INCREF(self->last);
2650 return (PyObject*) self->last;
2651}
2652
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653/* -------------------------------------------------------------------- */
2654/* methods (in alphabetical order) */
2655
Serhiy Storchakacb985562015-05-04 15:32:48 +03002656/*[clinic input]
2657_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002658
Serhiy Storchakacb985562015-05-04 15:32:48 +03002659 data: object
2660 /
2661
2662[clinic start generated code]*/
2663
2664static PyObject *
2665_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2666/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2667{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668 return treebuilder_handle_data(self, data);
2669}
2670
Serhiy Storchakacb985562015-05-04 15:32:48 +03002671/*[clinic input]
2672_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002673
Serhiy Storchakacb985562015-05-04 15:32:48 +03002674 tag: object
2675 /
2676
2677[clinic start generated code]*/
2678
2679static PyObject *
2680_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2681/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2682{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002683 return treebuilder_handle_end(self, tag);
2684}
2685
2686LOCAL(PyObject*)
2687treebuilder_done(TreeBuilderObject* self)
2688{
2689 PyObject* res;
2690
2691 /* FIXME: check stack size? */
2692
2693 if (self->root)
2694 res = self->root;
2695 else
2696 res = Py_None;
2697
2698 Py_INCREF(res);
2699 return res;
2700}
2701
Serhiy Storchakacb985562015-05-04 15:32:48 +03002702/*[clinic input]
2703_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002704
Serhiy Storchakacb985562015-05-04 15:32:48 +03002705[clinic start generated code]*/
2706
2707static PyObject *
2708_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2709/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2710{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711 return treebuilder_done(self);
2712}
2713
Serhiy Storchakacb985562015-05-04 15:32:48 +03002714/*[clinic input]
2715_elementtree.TreeBuilder.start
2716
2717 tag: object
2718 attrs: object = None
2719 /
2720
2721[clinic start generated code]*/
2722
2723static PyObject *
2724_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2725 PyObject *attrs)
2726/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002727{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002728 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002729}
2730
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002731/* ==================================================================== */
2732/* the expat interface */
2733
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002734#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002736
2737/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2738 * cached globally without being in per-module state.
2739 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002740static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002741#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742
Eli Bendersky52467b12012-06-01 07:13:08 +03002743static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2744 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2745
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746typedef struct {
2747 PyObject_HEAD
2748
2749 XML_Parser parser;
2750
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002751 PyObject *target;
2752 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002753
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002754 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002755
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002756 PyObject *handle_start;
2757 PyObject *handle_data;
2758 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002760 PyObject *handle_comment;
2761 PyObject *handle_pi;
2762 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002764 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002765
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002766} XMLParserObject;
2767
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002768static PyObject*
2769_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2770static PyObject *
2771_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2772 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002773
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002774/* helpers */
2775
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776LOCAL(PyObject*)
2777makeuniversal(XMLParserObject* self, const char* string)
2778{
2779 /* convert a UTF-8 tag/attribute name from the expat parser
2780 to a universal name string */
2781
Antoine Pitrouc1948842012-10-01 23:40:37 +02002782 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783 PyObject* key;
2784 PyObject* value;
2785
2786 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002787 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002788 if (!key)
2789 return NULL;
2790
2791 value = PyDict_GetItem(self->names, key);
2792
2793 if (value) {
2794 Py_INCREF(value);
2795 } else {
2796 /* new name. convert to universal name, and decode as
2797 necessary */
2798
2799 PyObject* tag;
2800 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002801 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002802
2803 /* look for namespace separator */
2804 for (i = 0; i < size; i++)
2805 if (string[i] == '}')
2806 break;
2807 if (i != size) {
2808 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002809 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002810 if (tag == NULL) {
2811 Py_DECREF(key);
2812 return NULL;
2813 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002814 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002815 p[0] = '{';
2816 memcpy(p+1, string, size);
2817 size++;
2818 } else {
2819 /* plain name; use key as tag */
2820 Py_INCREF(key);
2821 tag = key;
2822 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002823
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002824 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002825 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002826 value = PyUnicode_DecodeUTF8(p, size, "strict");
2827 Py_DECREF(tag);
2828 if (!value) {
2829 Py_DECREF(key);
2830 return NULL;
2831 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002832
2833 /* add to names dictionary */
2834 if (PyDict_SetItem(self->names, key, value) < 0) {
2835 Py_DECREF(key);
2836 Py_DECREF(value);
2837 return NULL;
2838 }
2839 }
2840
2841 Py_DECREF(key);
2842 return value;
2843}
2844
Eli Bendersky5b77d812012-03-16 08:20:05 +02002845/* Set the ParseError exception with the given parameters.
2846 * If message is not NULL, it's used as the error string. Otherwise, the
2847 * message string is the default for the given error_code.
2848*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002849static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002850expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2851 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002852{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002853 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002854 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002855
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002856 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002857 message ? message : EXPAT(ErrorString)(error_code),
2858 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002859 if (errmsg == NULL)
2860 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002861
Eli Bendersky532d03e2013-08-10 08:00:39 -07002862 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002863 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002864 if (!error)
2865 return;
2866
Eli Bendersky5b77d812012-03-16 08:20:05 +02002867 /* Add code and position attributes */
2868 code = PyLong_FromLong((long)error_code);
2869 if (!code) {
2870 Py_DECREF(error);
2871 return;
2872 }
2873 if (PyObject_SetAttrString(error, "code", code) == -1) {
2874 Py_DECREF(error);
2875 Py_DECREF(code);
2876 return;
2877 }
2878 Py_DECREF(code);
2879
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002880 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002881 if (!position) {
2882 Py_DECREF(error);
2883 return;
2884 }
2885 if (PyObject_SetAttrString(error, "position", position) == -1) {
2886 Py_DECREF(error);
2887 Py_DECREF(position);
2888 return;
2889 }
2890 Py_DECREF(position);
2891
Eli Bendersky532d03e2013-08-10 08:00:39 -07002892 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002893 Py_DECREF(error);
2894}
2895
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002896/* -------------------------------------------------------------------- */
2897/* handlers */
2898
2899static void
2900expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2901 int data_len)
2902{
2903 PyObject* key;
2904 PyObject* value;
2905 PyObject* res;
2906
2907 if (data_len < 2 || data_in[0] != '&')
2908 return;
2909
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002910 if (PyErr_Occurred())
2911 return;
2912
Neal Norwitz0269b912007-08-08 06:56:02 +00002913 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002914 if (!key)
2915 return;
2916
2917 value = PyDict_GetItem(self->entity, key);
2918
2919 if (value) {
2920 if (TreeBuilder_CheckExact(self->target))
2921 res = treebuilder_handle_data(
2922 (TreeBuilderObject*) self->target, value
2923 );
2924 else if (self->handle_data)
2925 res = PyObject_CallFunction(self->handle_data, "O", value);
2926 else
2927 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002928 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002929 } else if (!PyErr_Occurred()) {
2930 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002931 char message[128] = "undefined entity ";
2932 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002933 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002934 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002935 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002936 EXPAT(GetErrorColumnNumber)(self->parser),
2937 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002938 );
2939 }
2940
2941 Py_DECREF(key);
2942}
2943
2944static void
2945expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2946 const XML_Char **attrib_in)
2947{
2948 PyObject* res;
2949 PyObject* tag;
2950 PyObject* attrib;
2951 int ok;
2952
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002953 if (PyErr_Occurred())
2954 return;
2955
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 /* tag name */
2957 tag = makeuniversal(self, tag_in);
2958 if (!tag)
2959 return; /* parser will look for errors */
2960
2961 /* attributes */
2962 if (attrib_in[0]) {
2963 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002964 if (!attrib) {
2965 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002966 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002967 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002968 while (attrib_in[0] && attrib_in[1]) {
2969 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002970 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002971 if (!key || !value) {
2972 Py_XDECREF(value);
2973 Py_XDECREF(key);
2974 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002975 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002976 return;
2977 }
2978 ok = PyDict_SetItem(attrib, key, value);
2979 Py_DECREF(value);
2980 Py_DECREF(key);
2981 if (ok < 0) {
2982 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002983 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002984 return;
2985 }
2986 attrib_in += 2;
2987 }
2988 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002989 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002990 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002991 if (!attrib) {
2992 Py_DECREF(tag);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002993 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002994 }
Eli Bendersky48d358b2012-05-30 17:57:50 +03002995 }
2996
2997 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002998 /* shortcut */
2999 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3000 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003001 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003002 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003003 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003004 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003005 res = NULL;
3006
3007 Py_DECREF(tag);
3008 Py_DECREF(attrib);
3009
3010 Py_XDECREF(res);
3011}
3012
3013static void
3014expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3015 int data_len)
3016{
3017 PyObject* data;
3018 PyObject* res;
3019
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003020 if (PyErr_Occurred())
3021 return;
3022
Neal Norwitz0269b912007-08-08 06:56:02 +00003023 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003024 if (!data)
3025 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003026
3027 if (TreeBuilder_CheckExact(self->target))
3028 /* shortcut */
3029 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3030 else if (self->handle_data)
3031 res = PyObject_CallFunction(self->handle_data, "O", data);
3032 else
3033 res = NULL;
3034
3035 Py_DECREF(data);
3036
3037 Py_XDECREF(res);
3038}
3039
3040static void
3041expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3042{
3043 PyObject* tag;
3044 PyObject* res = NULL;
3045
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003046 if (PyErr_Occurred())
3047 return;
3048
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003049 if (TreeBuilder_CheckExact(self->target))
3050 /* shortcut */
3051 /* the standard tree builder doesn't look at the end tag */
3052 res = treebuilder_handle_end(
3053 (TreeBuilderObject*) self->target, Py_None
3054 );
3055 else if (self->handle_end) {
3056 tag = makeuniversal(self, tag_in);
3057 if (tag) {
3058 res = PyObject_CallFunction(self->handle_end, "O", tag);
3059 Py_DECREF(tag);
3060 }
3061 }
3062
3063 Py_XDECREF(res);
3064}
3065
3066static void
3067expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3068 const XML_Char *uri)
3069{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003070 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3071 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003072
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003073 if (PyErr_Occurred())
3074 return;
3075
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003076 if (!target->events || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003077 return;
3078
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003079 if (!uri)
3080 uri = "";
3081 if (!prefix)
3082 prefix = "";
3083
3084 parcel = Py_BuildValue("ss", prefix, uri);
3085 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003086 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003087 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3088 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003089}
3090
3091static void
3092expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3093{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003094 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3095
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003096 if (PyErr_Occurred())
3097 return;
3098
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003099 if (!target->events)
3100 return;
3101
3102 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103}
3104
3105static void
3106expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3107{
3108 PyObject* comment;
3109 PyObject* res;
3110
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003111 if (PyErr_Occurred())
3112 return;
3113
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003114 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003115 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003116 if (comment) {
3117 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3118 Py_XDECREF(res);
3119 Py_DECREF(comment);
3120 }
3121 }
3122}
3123
Eli Bendersky45839902013-01-13 05:14:47 -08003124static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003125expat_start_doctype_handler(XMLParserObject *self,
3126 const XML_Char *doctype_name,
3127 const XML_Char *sysid,
3128 const XML_Char *pubid,
3129 int has_internal_subset)
3130{
3131 PyObject *self_pyobj = (PyObject *)self;
3132 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3133 PyObject *parser_doctype = NULL;
3134 PyObject *res = NULL;
3135
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003136 if (PyErr_Occurred())
3137 return;
3138
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003139 doctype_name_obj = makeuniversal(self, doctype_name);
3140 if (!doctype_name_obj)
3141 return;
3142
3143 if (sysid) {
3144 sysid_obj = makeuniversal(self, sysid);
3145 if (!sysid_obj) {
3146 Py_DECREF(doctype_name_obj);
3147 return;
3148 }
3149 } else {
3150 Py_INCREF(Py_None);
3151 sysid_obj = Py_None;
3152 }
3153
3154 if (pubid) {
3155 pubid_obj = makeuniversal(self, pubid);
3156 if (!pubid_obj) {
3157 Py_DECREF(doctype_name_obj);
3158 Py_DECREF(sysid_obj);
3159 return;
3160 }
3161 } else {
3162 Py_INCREF(Py_None);
3163 pubid_obj = Py_None;
3164 }
3165
3166 /* If the target has a handler for doctype, call it. */
3167 if (self->handle_doctype) {
3168 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3169 doctype_name_obj, pubid_obj, sysid_obj);
3170 Py_CLEAR(res);
3171 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003172 else {
3173 /* Now see if the parser itself has a doctype method. If yes and it's
3174 * a custom method, call it but warn about deprecation. If it's only
3175 * the vanilla XMLParser method, do nothing.
3176 */
3177 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3178 if (parser_doctype &&
3179 !(PyCFunction_Check(parser_doctype) &&
3180 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3181 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003182 (PyCFunction) _elementtree_XMLParser_doctype)) {
3183 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3184 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003185 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003186 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003187 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003188 res = PyObject_CallFunction(parser_doctype, "OOO",
3189 doctype_name_obj, pubid_obj, sysid_obj);
3190 Py_CLEAR(res);
3191 }
3192 }
3193
3194clear:
3195 Py_XDECREF(parser_doctype);
3196 Py_DECREF(doctype_name_obj);
3197 Py_DECREF(pubid_obj);
3198 Py_DECREF(sysid_obj);
3199}
3200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201static void
3202expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3203 const XML_Char* data_in)
3204{
3205 PyObject* target;
3206 PyObject* data;
3207 PyObject* res;
3208
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003209 if (PyErr_Occurred())
3210 return;
3211
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003213 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3214 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215 if (target && data) {
3216 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3217 Py_XDECREF(res);
3218 Py_DECREF(data);
3219 Py_DECREF(target);
3220 } else {
3221 Py_XDECREF(data);
3222 Py_XDECREF(target);
3223 }
3224 }
3225}
3226
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003227/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228
Eli Bendersky52467b12012-06-01 07:13:08 +03003229static PyObject *
3230xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231{
Eli Bendersky52467b12012-06-01 07:13:08 +03003232 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3233 if (self) {
3234 self->parser = NULL;
3235 self->target = self->entity = self->names = NULL;
3236 self->handle_start = self->handle_data = self->handle_end = NULL;
3237 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003238 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 return (PyObject *)self;
3241}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003242
Serhiy Storchakacb985562015-05-04 15:32:48 +03003243/*[clinic input]
3244_elementtree.XMLParser.__init__
3245
3246 html: object = NULL
3247 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003248 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003249
3250[clinic start generated code]*/
3251
Eli Bendersky52467b12012-06-01 07:13:08 +03003252static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003253_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3254 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003255/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003256{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003257 self->entity = PyDict_New();
3258 if (!self->entity)
3259 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003260
Serhiy Storchakacb985562015-05-04 15:32:48 +03003261 self->names = PyDict_New();
3262 if (!self->names) {
3263 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003264 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003265 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003266
Serhiy Storchakacb985562015-05-04 15:32:48 +03003267 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3268 if (!self->parser) {
3269 Py_CLEAR(self->entity);
3270 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003272 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003273 }
3274
Eli Bendersky52467b12012-06-01 07:13:08 +03003275 if (target) {
3276 Py_INCREF(target);
3277 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003278 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003280 Py_CLEAR(self->entity);
3281 Py_CLEAR(self->names);
3282 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003283 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003285 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003286 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003287
Serhiy Storchakacb985562015-05-04 15:32:48 +03003288 self->handle_start = PyObject_GetAttrString(target, "start");
3289 self->handle_data = PyObject_GetAttrString(target, "data");
3290 self->handle_end = PyObject_GetAttrString(target, "end");
3291 self->handle_comment = PyObject_GetAttrString(target, "comment");
3292 self->handle_pi = PyObject_GetAttrString(target, "pi");
3293 self->handle_close = PyObject_GetAttrString(target, "close");
3294 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295
3296 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003297
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003299 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003301 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 (XML_StartElementHandler) expat_start_handler,
3303 (XML_EndElementHandler) expat_end_handler
3304 );
3305 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003306 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 (XML_DefaultHandler) expat_default_handler
3308 );
3309 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003310 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 (XML_CharacterDataHandler) expat_data_handler
3312 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003313 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003315 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003316 (XML_CommentHandler) expat_comment_handler
3317 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003320 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321 (XML_ProcessingInstructionHandler) expat_pi_handler
3322 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003323 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003324 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003325 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3326 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003327 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003328 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003329 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003330 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331
Eli Bendersky52467b12012-06-01 07:13:08 +03003332 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333}
3334
Eli Bendersky52467b12012-06-01 07:13:08 +03003335static int
3336xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3337{
3338 Py_VISIT(self->handle_close);
3339 Py_VISIT(self->handle_pi);
3340 Py_VISIT(self->handle_comment);
3341 Py_VISIT(self->handle_end);
3342 Py_VISIT(self->handle_data);
3343 Py_VISIT(self->handle_start);
3344
3345 Py_VISIT(self->target);
3346 Py_VISIT(self->entity);
3347 Py_VISIT(self->names);
3348
3349 return 0;
3350}
3351
3352static int
3353xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354{
3355 EXPAT(ParserFree)(self->parser);
3356
Antoine Pitrouc1948842012-10-01 23:40:37 +02003357 Py_CLEAR(self->handle_close);
3358 Py_CLEAR(self->handle_pi);
3359 Py_CLEAR(self->handle_comment);
3360 Py_CLEAR(self->handle_end);
3361 Py_CLEAR(self->handle_data);
3362 Py_CLEAR(self->handle_start);
3363 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003364
Antoine Pitrouc1948842012-10-01 23:40:37 +02003365 Py_CLEAR(self->target);
3366 Py_CLEAR(self->entity);
3367 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003368
Eli Bendersky52467b12012-06-01 07:13:08 +03003369 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370}
3371
Eli Bendersky52467b12012-06-01 07:13:08 +03003372static void
3373xmlparser_dealloc(XMLParserObject* self)
3374{
3375 PyObject_GC_UnTrack(self);
3376 xmlparser_gc_clear(self);
3377 Py_TYPE(self)->tp_free((PyObject *)self);
3378}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003379
3380LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003381expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003382{
3383 int ok;
3384
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003385 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003386 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3387
3388 if (PyErr_Occurred())
3389 return NULL;
3390
3391 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003392 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003393 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003394 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003395 EXPAT(GetErrorColumnNumber)(self->parser),
3396 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003397 );
3398 return NULL;
3399 }
3400
3401 Py_RETURN_NONE;
3402}
3403
Serhiy Storchakacb985562015-05-04 15:32:48 +03003404/*[clinic input]
3405_elementtree.XMLParser.close
3406
3407[clinic start generated code]*/
3408
3409static PyObject *
3410_elementtree_XMLParser_close_impl(XMLParserObject *self)
3411/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003412{
3413 /* end feeding data to parser */
3414
3415 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003417 if (!res)
3418 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003419
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003420 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003421 Py_DECREF(res);
3422 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003423 }
3424 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003425 Py_DECREF(res);
3426 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003427 }
3428 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003429 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003430 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003431}
3432
Serhiy Storchakacb985562015-05-04 15:32:48 +03003433/*[clinic input]
3434_elementtree.XMLParser.feed
3435
3436 data: object
3437 /
3438
3439[clinic start generated code]*/
3440
3441static PyObject *
3442_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3443/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003444{
3445 /* feed data to parser */
3446
Serhiy Storchakacb985562015-05-04 15:32:48 +03003447 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003448 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003449 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3450 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003451 return NULL;
3452 if (data_len > INT_MAX) {
3453 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3454 return NULL;
3455 }
3456 /* Explicitly set UTF-8 encoding. Return code ignored. */
3457 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003458 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003459 }
3460 else {
3461 Py_buffer view;
3462 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003463 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003464 return NULL;
3465 if (view.len > INT_MAX) {
3466 PyBuffer_Release(&view);
3467 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3468 return NULL;
3469 }
3470 res = expat_parse(self, view.buf, (int)view.len, 0);
3471 PyBuffer_Release(&view);
3472 return res;
3473 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003474}
3475
Serhiy Storchakacb985562015-05-04 15:32:48 +03003476/*[clinic input]
3477_elementtree.XMLParser._parse_whole
3478
3479 file: object
3480 /
3481
3482[clinic start generated code]*/
3483
3484static PyObject *
3485_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3486/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003487{
Eli Benderskya3699232013-05-19 18:47:23 -07003488 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003489 PyObject* reader;
3490 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003491 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003492 PyObject* res;
3493
Serhiy Storchakacb985562015-05-04 15:32:48 +03003494 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003495 if (!reader)
3496 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003497
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003498 /* read from open file object */
3499 for (;;) {
3500
3501 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3502
3503 if (!buffer) {
3504 /* read failed (e.g. due to KeyboardInterrupt) */
3505 Py_DECREF(reader);
3506 return NULL;
3507 }
3508
Eli Benderskyf996e772012-03-16 05:53:30 +02003509 if (PyUnicode_CheckExact(buffer)) {
3510 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003511 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003512 Py_DECREF(buffer);
3513 break;
3514 }
3515 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003516 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003517 if (!temp) {
3518 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003519 Py_DECREF(reader);
3520 return NULL;
3521 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003522 buffer = temp;
3523 }
3524 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003525 Py_DECREF(buffer);
3526 break;
3527 }
3528
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003529 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3530 Py_DECREF(buffer);
3531 Py_DECREF(reader);
3532 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3533 return NULL;
3534 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003535 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003536 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003537 );
3538
3539 Py_DECREF(buffer);
3540
3541 if (!res) {
3542 Py_DECREF(reader);
3543 return NULL;
3544 }
3545 Py_DECREF(res);
3546
3547 }
3548
3549 Py_DECREF(reader);
3550
3551 res = expat_parse(self, "", 0, 1);
3552
3553 if (res && TreeBuilder_CheckExact(self->target)) {
3554 Py_DECREF(res);
3555 return treebuilder_done((TreeBuilderObject*) self->target);
3556 }
3557
3558 return res;
3559}
3560
Serhiy Storchakacb985562015-05-04 15:32:48 +03003561/*[clinic input]
3562_elementtree.XMLParser.doctype
3563
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003564 name: object
3565 pubid: object
3566 system: object
3567 /
3568
Serhiy Storchakacb985562015-05-04 15:32:48 +03003569[clinic start generated code]*/
3570
3571static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003572_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3573 PyObject *pubid, PyObject *system)
3574/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003575{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003576 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3577 "This method of XMLParser is deprecated. Define"
3578 " doctype() method on the TreeBuilder target.",
3579 1) < 0) {
3580 return NULL;
3581 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003582 Py_RETURN_NONE;
3583}
3584
Serhiy Storchakacb985562015-05-04 15:32:48 +03003585/*[clinic input]
3586_elementtree.XMLParser._setevents
3587
3588 events_queue: object(subclass_of='&PyList_Type')
3589 events_to_report: object = None
3590 /
3591
3592[clinic start generated code]*/
3593
3594static PyObject *
3595_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3596 PyObject *events_queue,
3597 PyObject *events_to_report)
3598/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003599{
3600 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003601 Py_ssize_t i, seqlen;
3602 TreeBuilderObject *target;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003603 PyObject *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003604
3605 if (!TreeBuilder_CheckExact(self->target)) {
3606 PyErr_SetString(
3607 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003608 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 "targets"
3610 );
3611 return NULL;
3612 }
3613
3614 target = (TreeBuilderObject*) self->target;
3615
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003616 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003617 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003618 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003619
3620 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003621 Py_CLEAR(target->start_event_obj);
3622 Py_CLEAR(target->end_event_obj);
3623 Py_CLEAR(target->start_ns_event_obj);
3624 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003625
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003626 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003627 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003628 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003629 Py_RETURN_NONE;
3630 }
3631
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003632 if (!(events_seq = PySequence_Fast(events_to_report,
3633 "events must be a sequence"))) {
3634 return NULL;
3635 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003637 seqlen = PySequence_Size(events_seq);
3638 for (i = 0; i < seqlen; ++i) {
3639 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3640 char *event_name = NULL;
3641 if (PyUnicode_Check(event_name_obj)) {
3642 event_name = _PyUnicode_AsString(event_name_obj);
3643 } else if (PyBytes_Check(event_name_obj)) {
3644 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003645 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003646
3647 if (event_name == NULL) {
3648 Py_DECREF(events_seq);
3649 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3650 return NULL;
3651 } else if (strcmp(event_name, "start") == 0) {
3652 Py_INCREF(event_name_obj);
3653 target->start_event_obj = event_name_obj;
3654 } else if (strcmp(event_name, "end") == 0) {
3655 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003656 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003657 target->end_event_obj = event_name_obj;
3658 } else if (strcmp(event_name, "start-ns") == 0) {
3659 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003660 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003661 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003662 EXPAT(SetNamespaceDeclHandler)(
3663 self->parser,
3664 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3665 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3666 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003667 } else if (strcmp(event_name, "end-ns") == 0) {
3668 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003669 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003670 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671 EXPAT(SetNamespaceDeclHandler)(
3672 self->parser,
3673 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3674 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3675 );
3676 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003677 Py_DECREF(events_seq);
3678 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 return NULL;
3680 }
3681 }
3682
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003683 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003684 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685}
3686
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003687static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003688xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003690 if (PyUnicode_Check(nameobj)) {
3691 PyObject* res;
3692 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3693 res = self->entity;
3694 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3695 res = self->target;
3696 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3697 return PyUnicode_FromFormat(
3698 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003699 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003700 }
3701 else
3702 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003703
Alexander Belopolskye239d232010-12-08 23:31:48 +00003704 Py_INCREF(res);
3705 return res;
3706 }
3707 generic:
3708 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003709}
3710
Serhiy Storchakacb985562015-05-04 15:32:48 +03003711#include "clinic/_elementtree.c.h"
3712
3713static PyMethodDef element_methods[] = {
3714
3715 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3716
3717 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3718 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3719
3720 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3721 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3722 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3723
3724 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3725 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3726 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3727 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3728
3729 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3730 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3731 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3732
3733 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3734 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3735
3736 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3737 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3738
3739 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3740
3741 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3742 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3743 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3744 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3745 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3746
3747 {NULL, NULL}
3748};
3749
3750static PyMappingMethods element_as_mapping = {
3751 (lenfunc) element_length,
3752 (binaryfunc) element_subscr,
3753 (objobjargproc) element_ass_subscr,
3754};
3755
3756static PyTypeObject Element_Type = {
3757 PyVarObject_HEAD_INIT(NULL, 0)
3758 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3759 /* methods */
3760 (destructor)element_dealloc, /* tp_dealloc */
3761 0, /* tp_print */
3762 0, /* tp_getattr */
3763 0, /* tp_setattr */
3764 0, /* tp_reserved */
3765 (reprfunc)element_repr, /* tp_repr */
3766 0, /* tp_as_number */
3767 &element_as_sequence, /* tp_as_sequence */
3768 &element_as_mapping, /* tp_as_mapping */
3769 0, /* tp_hash */
3770 0, /* tp_call */
3771 0, /* tp_str */
3772 (getattrofunc)element_getattro, /* tp_getattro */
3773 (setattrofunc)element_setattro, /* tp_setattro */
3774 0, /* tp_as_buffer */
3775 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3776 /* tp_flags */
3777 0, /* tp_doc */
3778 (traverseproc)element_gc_traverse, /* tp_traverse */
3779 (inquiry)element_gc_clear, /* tp_clear */
3780 0, /* tp_richcompare */
3781 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3782 0, /* tp_iter */
3783 0, /* tp_iternext */
3784 element_methods, /* tp_methods */
3785 0, /* tp_members */
3786 0, /* tp_getset */
3787 0, /* tp_base */
3788 0, /* tp_dict */
3789 0, /* tp_descr_get */
3790 0, /* tp_descr_set */
3791 0, /* tp_dictoffset */
3792 (initproc)element_init, /* tp_init */
3793 PyType_GenericAlloc, /* tp_alloc */
3794 element_new, /* tp_new */
3795 0, /* tp_free */
3796};
3797
3798static PyMethodDef treebuilder_methods[] = {
3799 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3800 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3801 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3802 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3803 {NULL, NULL}
3804};
3805
3806static PyTypeObject TreeBuilder_Type = {
3807 PyVarObject_HEAD_INIT(NULL, 0)
3808 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3809 /* methods */
3810 (destructor)treebuilder_dealloc, /* tp_dealloc */
3811 0, /* tp_print */
3812 0, /* tp_getattr */
3813 0, /* tp_setattr */
3814 0, /* tp_reserved */
3815 0, /* tp_repr */
3816 0, /* tp_as_number */
3817 0, /* tp_as_sequence */
3818 0, /* tp_as_mapping */
3819 0, /* tp_hash */
3820 0, /* tp_call */
3821 0, /* tp_str */
3822 0, /* tp_getattro */
3823 0, /* tp_setattro */
3824 0, /* tp_as_buffer */
3825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3826 /* tp_flags */
3827 0, /* tp_doc */
3828 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3829 (inquiry)treebuilder_gc_clear, /* tp_clear */
3830 0, /* tp_richcompare */
3831 0, /* tp_weaklistoffset */
3832 0, /* tp_iter */
3833 0, /* tp_iternext */
3834 treebuilder_methods, /* tp_methods */
3835 0, /* tp_members */
3836 0, /* tp_getset */
3837 0, /* tp_base */
3838 0, /* tp_dict */
3839 0, /* tp_descr_get */
3840 0, /* tp_descr_set */
3841 0, /* tp_dictoffset */
3842 _elementtree_TreeBuilder___init__, /* tp_init */
3843 PyType_GenericAlloc, /* tp_alloc */
3844 treebuilder_new, /* tp_new */
3845 0, /* tp_free */
3846};
3847
3848static PyMethodDef xmlparser_methods[] = {
3849 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3850 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3851 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3852 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3853 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3854 {NULL, NULL}
3855};
3856
Neal Norwitz227b5332006-03-22 09:28:35 +00003857static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003858 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003859 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003860 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003861 (destructor)xmlparser_dealloc, /* tp_dealloc */
3862 0, /* tp_print */
3863 0, /* tp_getattr */
3864 0, /* tp_setattr */
3865 0, /* tp_reserved */
3866 0, /* tp_repr */
3867 0, /* tp_as_number */
3868 0, /* tp_as_sequence */
3869 0, /* tp_as_mapping */
3870 0, /* tp_hash */
3871 0, /* tp_call */
3872 0, /* tp_str */
3873 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3874 0, /* tp_setattro */
3875 0, /* tp_as_buffer */
3876 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3877 /* tp_flags */
3878 0, /* tp_doc */
3879 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3880 (inquiry)xmlparser_gc_clear, /* tp_clear */
3881 0, /* tp_richcompare */
3882 0, /* tp_weaklistoffset */
3883 0, /* tp_iter */
3884 0, /* tp_iternext */
3885 xmlparser_methods, /* tp_methods */
3886 0, /* tp_members */
3887 0, /* tp_getset */
3888 0, /* tp_base */
3889 0, /* tp_dict */
3890 0, /* tp_descr_get */
3891 0, /* tp_descr_set */
3892 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003893 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003894 PyType_GenericAlloc, /* tp_alloc */
3895 xmlparser_new, /* tp_new */
3896 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003897};
3898
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003899/* ==================================================================== */
3900/* python module interface */
3901
3902static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003903 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003904 {NULL, NULL}
3905};
3906
Martin v. Löwis1a214512008-06-11 05:26:20 +00003907
Eli Bendersky532d03e2013-08-10 08:00:39 -07003908static struct PyModuleDef elementtreemodule = {
3909 PyModuleDef_HEAD_INIT,
3910 "_elementtree",
3911 NULL,
3912 sizeof(elementtreestate),
3913 _functions,
3914 NULL,
3915 elementtree_traverse,
3916 elementtree_clear,
3917 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003918};
3919
Neal Norwitzf6657e62006-12-28 04:47:50 +00003920PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003921PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003922{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003923 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003924 elementtreestate *st;
3925
3926 m = PyState_FindModule(&elementtreemodule);
3927 if (m) {
3928 Py_INCREF(m);
3929 return m;
3930 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003931
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003932 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003933 if (PyType_Ready(&ElementIter_Type) < 0)
3934 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003935 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003936 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003937 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003938 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003939 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003940 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003941
Eli Bendersky532d03e2013-08-10 08:00:39 -07003942 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003943 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003944 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003945 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003946
Eli Bendersky828efde2012-04-05 05:40:58 +03003947 if (!(temp = PyImport_ImportModule("copy")))
3948 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003949 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003950 Py_XDECREF(temp);
3951
Eli Bendersky532d03e2013-08-10 08:00:39 -07003952 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003953 return NULL;
3954
Eli Bendersky20d41742012-06-01 09:48:37 +03003955 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003956 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3957 if (expat_capi) {
3958 /* check that it's usable */
3959 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003960 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003961 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3962 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003963 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003964 PyErr_SetString(PyExc_ImportError,
3965 "pyexpat version is incompatible");
3966 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003967 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003968 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003969 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003970 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003971
Eli Bendersky532d03e2013-08-10 08:00:39 -07003972 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003973 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003974 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003975 Py_INCREF(st->parseerror_obj);
3976 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003977
Eli Bendersky092af1f2012-03-04 07:14:03 +02003978 Py_INCREF((PyObject *)&Element_Type);
3979 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3980
Eli Bendersky58d548d2012-05-29 15:45:16 +03003981 Py_INCREF((PyObject *)&TreeBuilder_Type);
3982 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3983
Eli Bendersky52467b12012-06-01 07:13:08 +03003984 Py_INCREF((PyObject *)&XMLParser_Type);
3985 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003986
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003987 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003988}