blob: 911b5ac5a9a9fc1e613b91856d2b2038103b2e9d [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Martin v. Löwisbce16662012-06-17 10:41:22 +0200850 Py_ssize_t result = sizeof(ElementObject);
851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
938 Py_CLEAR(self->tag);
939 self->tag = tag;
940 Py_INCREF(self->tag);
941
Eli Benderskydd3661e2013-09-13 06:24:25 -0700942 _clear_joined_ptr(&self->text);
943 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
944 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945
Eli Benderskydd3661e2013-09-13 06:24:25 -0700946 _clear_joined_ptr(&self->tail);
947 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
948 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949
950 /* Handle ATTRIB and CHILDREN. */
951 if (!children && !attrib)
952 Py_RETURN_NONE;
953
954 /* Compute 'nchildren'. */
955 if (children) {
956 if (!PyList_Check(children)) {
957 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
958 return NULL;
959 }
960 nchildren = PyList_Size(children);
961 }
962 else {
963 nchildren = 0;
964 }
965
966 /* Allocate 'extra'. */
967 if (element_resize(self, nchildren)) {
968 return NULL;
969 }
970 assert(self->extra && self->extra->allocated >= nchildren);
971
972 /* Copy children */
973 for (i = 0; i < nchildren; i++) {
974 self->extra->children[i] = PyList_GET_ITEM(children, i);
975 Py_INCREF(self->extra->children[i]);
976 }
977
978 self->extra->length = nchildren;
979 self->extra->allocated = nchildren;
980
981 /* Stash attrib. */
982 if (attrib) {
983 Py_CLEAR(self->extra->attrib);
984 self->extra->attrib = attrib;
985 Py_INCREF(attrib);
986 }
987
988 Py_RETURN_NONE;
989}
990
991/* __setstate__ for Element instance from the Python implementation.
992 * 'state' should be the instance dict.
993 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300994
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995static PyObject *
996element_setstate_from_Python(ElementObject *self, PyObject *state)
997{
998 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
999 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1000 PyObject *args;
1001 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001002 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004 tag = attrib = text = tail = children = NULL;
1005 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001008
1009 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1010 &attrib, &text, &tail, &children))
1011 retval = element_setstate_from_attributes(self, tag, attrib, text,
1012 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001014 retval = NULL;
1015
1016 Py_DECREF(args);
1017 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018}
1019
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020/*[clinic input]
1021_elementtree.Element.__setstate__
1022
1023 state: object
1024 /
1025
1026[clinic start generated code]*/
1027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001029_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1030/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031{
1032 if (!PyDict_CheckExact(state)) {
1033 PyErr_Format(PyExc_TypeError,
1034 "Don't know how to unpickle \"%.200R\" as an Element",
1035 state);
1036 return NULL;
1037 }
1038 else
1039 return element_setstate_from_Python(self, state);
1040}
1041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042LOCAL(int)
1043checkpath(PyObject* tag)
1044{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 Py_ssize_t i;
1046 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047
1048 /* check if a tag contains an xpath character */
1049
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050#define PATHCHAR(ch) \
1051 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1055 void *data = PyUnicode_DATA(tag);
1056 unsigned int kind = PyUnicode_KIND(tag);
1057 for (i = 0; i < len; i++) {
1058 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1059 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 return 1;
1065 }
1066 return 0;
1067 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001068 if (PyBytes_Check(tag)) {
1069 char *p = PyBytes_AS_STRING(tag);
1070 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071 if (p[i] == '{')
1072 check = 0;
1073 else if (p[i] == '}')
1074 check = 1;
1075 else if (check && PATHCHAR(p[i]))
1076 return 1;
1077 }
1078 return 0;
1079 }
1080
1081 return 1; /* unknown type; might be path expression */
1082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.extend
1086
1087 elements: object
1088 /
1089
1090[clinic start generated code]*/
1091
1092static PyObject *
1093_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1094/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095{
1096 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001097 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098
Serhiy Storchakacb985562015-05-04 15:32:48 +03001099 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100 if (!seq) {
1101 PyErr_Format(
1102 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104 );
1105 return NULL;
1106 }
1107
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001110 Py_INCREF(element);
1111 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001112 PyErr_Format(
1113 PyExc_TypeError,
1114 "expected an Element, not \"%.200s\"",
1115 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001116 Py_DECREF(seq);
1117 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001118 return NULL;
1119 }
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (element_add_subelement(self, element) < 0) {
1122 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 return NULL;
1125 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 }
1128
1129 Py_DECREF(seq);
1130
1131 Py_RETURN_NONE;
1132}
1133
Serhiy Storchakacb985562015-05-04 15:32:48 +03001134/*[clinic input]
1135_elementtree.Element.find
1136
1137 path: object
1138 namespaces: object = None
1139
1140[clinic start generated code]*/
1141
1142static PyObject *
1143_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1144 PyObject *namespaces)
1145/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001147 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001148 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149
Serhiy Storchakacb985562015-05-04 15:32:48 +03001150 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001151 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001153 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001155 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156
1157 if (!self->extra)
1158 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 for (i = 0; i < self->extra->length; i++) {
1161 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001162 int rc;
1163 if (!Element_CheckExact(item))
1164 continue;
1165 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001166 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_DECREF(item);
1170 if (rc < 0)
1171 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 }
1173
1174 Py_RETURN_NONE;
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.findtext
1179
1180 path: object
1181 default: object = None
1182 namespaces: object = None
1183
1184[clinic start generated code]*/
1185
1186static PyObject *
1187_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1188 PyObject *default_value,
1189 PyObject *namespaces)
1190/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001192 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001193 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001194 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 );
1200
1201 if (!self->extra) {
1202 Py_INCREF(default_value);
1203 return default_value;
1204 }
1205
1206 for (i = 0; i < self->extra->length; i++) {
1207 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 int rc;
1209 if (!Element_CheckExact(item))
1210 continue;
1211 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001212 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 if (text == Py_None) {
1216 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001217 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001219 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001220 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 return text;
1222 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 Py_DECREF(item);
1224 if (rc < 0)
1225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 Py_INCREF(default_value);
1229 return default_value;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.findall
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001247 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001248 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001249
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001251 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001252 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001255 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256
1257 out = PyList_New(0);
1258 if (!out)
1259 return NULL;
1260
1261 if (!self->extra)
1262 return out;
1263
1264 for (i = 0; i < self->extra->length; i++) {
1265 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 int rc;
1267 if (!Element_CheckExact(item))
1268 continue;
1269 Py_INCREF(item);
1270 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1271 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1272 Py_DECREF(item);
1273 Py_DECREF(out);
1274 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001276 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 }
1278
1279 return out;
1280}
1281
Serhiy Storchakacb985562015-05-04 15:32:48 +03001282/*[clinic input]
1283_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001284
Serhiy Storchakacb985562015-05-04 15:32:48 +03001285 path: object
1286 namespaces: object = None
1287
1288[clinic start generated code]*/
1289
1290static PyObject *
1291_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1292 PyObject *namespaces)
1293/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1294{
1295 PyObject* tag = path;
1296 _Py_IDENTIFIER(iterfind);
1297 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001298
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001299 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001300 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001301}
1302
Serhiy Storchakacb985562015-05-04 15:32:48 +03001303/*[clinic input]
1304_elementtree.Element.get
1305
1306 key: object
1307 default: object = None
1308
1309[clinic start generated code]*/
1310
1311static PyObject *
1312_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1313 PyObject *default_value)
1314/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
1316 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
1318 if (!self->extra || self->extra->attrib == Py_None)
1319 value = default_value;
1320 else {
1321 value = PyDict_GetItem(self->extra->attrib, key);
1322 if (!value)
1323 value = default_value;
1324 }
1325
1326 Py_INCREF(value);
1327 return value;
1328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.getchildren
1332
1333[clinic start generated code]*/
1334
1335static PyObject *
1336_elementtree_Element_getchildren_impl(ElementObject *self)
1337/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001339 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340 PyObject* list;
1341
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001342 /* FIXME: report as deprecated? */
1343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344 if (!self->extra)
1345 return PyList_New(0);
1346
1347 list = PyList_New(self->extra->length);
1348 if (!list)
1349 return NULL;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
1353 Py_INCREF(item);
1354 PyList_SET_ITEM(list, i, item);
1355 }
1356
1357 return list;
1358}
1359
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001360
Eli Bendersky64d11e62012-06-15 07:42:50 +03001361static PyObject *
1362create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1363
1364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365/*[clinic input]
1366_elementtree.Element.iter
1367
1368 tag: object = None
1369
1370[clinic start generated code]*/
1371
Eli Bendersky64d11e62012-06-15 07:42:50 +03001372static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001373_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1374/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001375{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001376 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001377}
1378
1379
Serhiy Storchakacb985562015-05-04 15:32:48 +03001380/*[clinic input]
1381_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001382
Serhiy Storchakacb985562015-05-04 15:32:48 +03001383[clinic start generated code]*/
1384
1385static PyObject *
1386_elementtree_Element_itertext_impl(ElementObject *self)
1387/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1388{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001389 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001390}
1391
Eli Bendersky64d11e62012-06-15 07:42:50 +03001392
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001393static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001394element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001395{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001396 ElementObject* self = (ElementObject*) self_;
1397
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001398 if (!self->extra || index < 0 || index >= self->extra->length) {
1399 PyErr_SetString(
1400 PyExc_IndexError,
1401 "child index out of range"
1402 );
1403 return NULL;
1404 }
1405
1406 Py_INCREF(self->extra->children[index]);
1407 return self->extra->children[index];
1408}
1409
Serhiy Storchakacb985562015-05-04 15:32:48 +03001410/*[clinic input]
1411_elementtree.Element.insert
1412
1413 index: Py_ssize_t
1414 subelement: object(subclass_of='&Element_Type')
1415 /
1416
1417[clinic start generated code]*/
1418
1419static PyObject *
1420_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1421 PyObject *subelement)
1422/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001423{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001424 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001425
Victor Stinner5f0af232013-07-11 23:01:36 +02001426 if (!self->extra) {
1427 if (create_extra(self, NULL) < 0)
1428 return NULL;
1429 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001430
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001431 if (index < 0) {
1432 index += self->extra->length;
1433 if (index < 0)
1434 index = 0;
1435 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436 if (index > self->extra->length)
1437 index = self->extra->length;
1438
1439 if (element_resize(self, 1) < 0)
1440 return NULL;
1441
1442 for (i = self->extra->length; i > index; i--)
1443 self->extra->children[i] = self->extra->children[i-1];
1444
Serhiy Storchakacb985562015-05-04 15:32:48 +03001445 Py_INCREF(subelement);
1446 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001447
1448 self->extra->length++;
1449
1450 Py_RETURN_NONE;
1451}
1452
Serhiy Storchakacb985562015-05-04 15:32:48 +03001453/*[clinic input]
1454_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456[clinic start generated code]*/
1457
1458static PyObject *
1459_elementtree_Element_items_impl(ElementObject *self)
1460/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1461{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001462 if (!self->extra || self->extra->attrib == Py_None)
1463 return PyList_New(0);
1464
1465 return PyDict_Items(self->extra->attrib);
1466}
1467
Serhiy Storchakacb985562015-05-04 15:32:48 +03001468/*[clinic input]
1469_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001470
Serhiy Storchakacb985562015-05-04 15:32:48 +03001471[clinic start generated code]*/
1472
1473static PyObject *
1474_elementtree_Element_keys_impl(ElementObject *self)
1475/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1476{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001477 if (!self->extra || self->extra->attrib == Py_None)
1478 return PyList_New(0);
1479
1480 return PyDict_Keys(self->extra->attrib);
1481}
1482
Martin v. Löwis18e16552006-02-15 17:27:45 +00001483static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484element_length(ElementObject* self)
1485{
1486 if (!self->extra)
1487 return 0;
1488
1489 return self->extra->length;
1490}
1491
Serhiy Storchakacb985562015-05-04 15:32:48 +03001492/*[clinic input]
1493_elementtree.Element.makeelement
1494
1495 tag: object
1496 attrib: object
1497 /
1498
1499[clinic start generated code]*/
1500
1501static PyObject *
1502_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1503 PyObject *attrib)
1504/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001505{
1506 PyObject* elem;
1507
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001508 attrib = PyDict_Copy(attrib);
1509 if (!attrib)
1510 return NULL;
1511
Eli Bendersky092af1f2012-03-04 07:14:03 +02001512 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001513
1514 Py_DECREF(attrib);
1515
1516 return elem;
1517}
1518
Serhiy Storchakacb985562015-05-04 15:32:48 +03001519/*[clinic input]
1520_elementtree.Element.remove
1521
1522 subelement: object(subclass_of='&Element_Type')
1523 /
1524
1525[clinic start generated code]*/
1526
1527static PyObject *
1528_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1529/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001530{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001531 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001532 int rc;
1533 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001534
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535 if (!self->extra) {
1536 /* element has no children, so raise exception */
1537 PyErr_SetString(
1538 PyExc_ValueError,
1539 "list.remove(x): x not in list"
1540 );
1541 return NULL;
1542 }
1543
1544 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001545 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001547 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001548 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001549 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001550 if (rc < 0)
1551 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001552 }
1553
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001554 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001555 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001556 PyErr_SetString(
1557 PyExc_ValueError,
1558 "list.remove(x): x not in list"
1559 );
1560 return NULL;
1561 }
1562
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001563 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001564
1565 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001566 for (; i < self->extra->length; i++)
1567 self->extra->children[i] = self->extra->children[i+1];
1568
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001569 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001570 Py_RETURN_NONE;
1571}
1572
1573static PyObject*
1574element_repr(ElementObject* self)
1575{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001576 if (self->tag)
1577 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1578 else
1579 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001580}
1581
Serhiy Storchakacb985562015-05-04 15:32:48 +03001582/*[clinic input]
1583_elementtree.Element.set
1584
1585 key: object
1586 value: object
1587 /
1588
1589[clinic start generated code]*/
1590
1591static PyObject *
1592_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1593 PyObject *value)
1594/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001595{
1596 PyObject* attrib;
1597
Victor Stinner5f0af232013-07-11 23:01:36 +02001598 if (!self->extra) {
1599 if (create_extra(self, NULL) < 0)
1600 return NULL;
1601 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001602
1603 attrib = element_get_attrib(self);
1604 if (!attrib)
1605 return NULL;
1606
1607 if (PyDict_SetItem(attrib, key, value) < 0)
1608 return NULL;
1609
1610 Py_RETURN_NONE;
1611}
1612
1613static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001614element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001615{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001616 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001617 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001618 PyObject* old;
1619
1620 if (!self->extra || index < 0 || index >= self->extra->length) {
1621 PyErr_SetString(
1622 PyExc_IndexError,
1623 "child assignment index out of range");
1624 return -1;
1625 }
1626
1627 old = self->extra->children[index];
1628
1629 if (item) {
1630 Py_INCREF(item);
1631 self->extra->children[index] = item;
1632 } else {
1633 self->extra->length--;
1634 for (i = index; i < self->extra->length; i++)
1635 self->extra->children[i] = self->extra->children[i+1];
1636 }
1637
1638 Py_DECREF(old);
1639
1640 return 0;
1641}
1642
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001643static PyObject*
1644element_subscr(PyObject* self_, PyObject* item)
1645{
1646 ElementObject* self = (ElementObject*) self_;
1647
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001648 if (PyIndex_Check(item)) {
1649 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001650
1651 if (i == -1 && PyErr_Occurred()) {
1652 return NULL;
1653 }
1654 if (i < 0 && self->extra)
1655 i += self->extra->length;
1656 return element_getitem(self_, i);
1657 }
1658 else if (PySlice_Check(item)) {
1659 Py_ssize_t start, stop, step, slicelen, cur, i;
1660 PyObject* list;
1661
1662 if (!self->extra)
1663 return PyList_New(0);
1664
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001665 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001666 self->extra->length,
1667 &start, &stop, &step, &slicelen) < 0) {
1668 return NULL;
1669 }
1670
1671 if (slicelen <= 0)
1672 return PyList_New(0);
1673 else {
1674 list = PyList_New(slicelen);
1675 if (!list)
1676 return NULL;
1677
1678 for (cur = start, i = 0; i < slicelen;
1679 cur += step, i++) {
1680 PyObject* item = self->extra->children[cur];
1681 Py_INCREF(item);
1682 PyList_SET_ITEM(list, i, item);
1683 }
1684
1685 return list;
1686 }
1687 }
1688 else {
1689 PyErr_SetString(PyExc_TypeError,
1690 "element indices must be integers");
1691 return NULL;
1692 }
1693}
1694
1695static int
1696element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1697{
1698 ElementObject* self = (ElementObject*) self_;
1699
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001700 if (PyIndex_Check(item)) {
1701 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001702
1703 if (i == -1 && PyErr_Occurred()) {
1704 return -1;
1705 }
1706 if (i < 0 && self->extra)
1707 i += self->extra->length;
1708 return element_setitem(self_, i, value);
1709 }
1710 else if (PySlice_Check(item)) {
1711 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1712
1713 PyObject* recycle = NULL;
1714 PyObject* seq = NULL;
1715
Victor Stinner5f0af232013-07-11 23:01:36 +02001716 if (!self->extra) {
1717 if (create_extra(self, NULL) < 0)
1718 return -1;
1719 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001720
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001721 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001722 self->extra->length,
1723 &start, &stop, &step, &slicelen) < 0) {
1724 return -1;
1725 }
1726
Eli Bendersky865756a2012-03-09 13:38:15 +02001727 if (value == NULL) {
1728 /* Delete slice */
1729 size_t cur;
1730 Py_ssize_t i;
1731
1732 if (slicelen <= 0)
1733 return 0;
1734
1735 /* Since we're deleting, the direction of the range doesn't matter,
1736 * so for simplicity make it always ascending.
1737 */
1738 if (step < 0) {
1739 stop = start + 1;
1740 start = stop + step * (slicelen - 1) - 1;
1741 step = -step;
1742 }
1743
1744 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1745
1746 /* recycle is a list that will contain all the children
1747 * scheduled for removal.
1748 */
1749 if (!(recycle = PyList_New(slicelen))) {
1750 PyErr_NoMemory();
1751 return -1;
1752 }
1753
1754 /* This loop walks over all the children that have to be deleted,
1755 * with cur pointing at them. num_moved is the amount of children
1756 * until the next deleted child that have to be "shifted down" to
1757 * occupy the deleted's places.
1758 * Note that in the ith iteration, shifting is done i+i places down
1759 * because i children were already removed.
1760 */
1761 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1762 /* Compute how many children have to be moved, clipping at the
1763 * list end.
1764 */
1765 Py_ssize_t num_moved = step - 1;
1766 if (cur + step >= (size_t)self->extra->length) {
1767 num_moved = self->extra->length - cur - 1;
1768 }
1769
1770 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1771
1772 memmove(
1773 self->extra->children + cur - i,
1774 self->extra->children + cur + 1,
1775 num_moved * sizeof(PyObject *));
1776 }
1777
1778 /* Leftover "tail" after the last removed child */
1779 cur = start + (size_t)slicelen * step;
1780 if (cur < (size_t)self->extra->length) {
1781 memmove(
1782 self->extra->children + cur - slicelen,
1783 self->extra->children + cur,
1784 (self->extra->length - cur) * sizeof(PyObject *));
1785 }
1786
1787 self->extra->length -= slicelen;
1788
1789 /* Discard the recycle list with all the deleted sub-elements */
1790 Py_XDECREF(recycle);
1791 return 0;
1792 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001793 else {
Eli Bendersky865756a2012-03-09 13:38:15 +02001794 /* A new slice is actually being assigned */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001795 seq = PySequence_Fast(value, "");
1796 if (!seq) {
1797 PyErr_Format(
1798 PyExc_TypeError,
1799 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1800 );
1801 return -1;
1802 }
1803 newlen = PySequence_Size(seq);
1804 }
1805
1806 if (step != 1 && newlen != slicelen)
1807 {
1808 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001809 "attempt to assign sequence of size %zd "
1810 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001811 newlen, slicelen
1812 );
1813 return -1;
1814 }
1815
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001816 /* Resize before creating the recycle bin, to prevent refleaks. */
1817 if (newlen > slicelen) {
1818 if (element_resize(self, newlen - slicelen) < 0) {
1819 if (seq) {
1820 Py_DECREF(seq);
1821 }
1822 return -1;
1823 }
1824 }
1825
1826 if (slicelen > 0) {
1827 /* to avoid recursive calls to this method (via decref), move
1828 old items to the recycle bin here, and get rid of them when
1829 we're done modifying the element */
1830 recycle = PyList_New(slicelen);
1831 if (!recycle) {
1832 if (seq) {
1833 Py_DECREF(seq);
1834 }
1835 return -1;
1836 }
1837 for (cur = start, i = 0; i < slicelen;
1838 cur += step, i++)
1839 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1840 }
1841
1842 if (newlen < slicelen) {
1843 /* delete slice */
1844 for (i = stop; i < self->extra->length; i++)
1845 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1846 } else if (newlen > slicelen) {
1847 /* insert slice */
1848 for (i = self->extra->length-1; i >= stop; i--)
1849 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1850 }
1851
1852 /* replace the slice */
1853 for (cur = start, i = 0; i < newlen;
1854 cur += step, i++) {
1855 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1856 Py_INCREF(element);
1857 self->extra->children[cur] = element;
1858 }
1859
1860 self->extra->length += newlen - slicelen;
1861
1862 if (seq) {
1863 Py_DECREF(seq);
1864 }
1865
1866 /* discard the recycle bin, and everything in it */
1867 Py_XDECREF(recycle);
1868
1869 return 0;
1870 }
1871 else {
1872 PyErr_SetString(PyExc_TypeError,
1873 "element indices must be integers");
1874 return -1;
1875 }
1876}
1877
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001878static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001879element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001880{
1881 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001882 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001883
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001884 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001885 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001886
Alexander Belopolskye239d232010-12-08 23:31:48 +00001887 if (name == NULL)
1888 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001889
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001890 /* handle common attributes first */
1891 if (strcmp(name, "tag") == 0) {
1892 res = self->tag;
1893 Py_INCREF(res);
1894 return res;
1895 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001896 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001897 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001898 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001899 }
1900
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001901 /* methods */
1902 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1903 if (res)
1904 return res;
1905
1906 /* less common attributes */
1907 if (strcmp(name, "tail") == 0) {
1908 PyErr_Clear();
1909 res = element_get_tail(self);
1910 } else if (strcmp(name, "attrib") == 0) {
1911 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001912 if (!self->extra) {
1913 if (create_extra(self, NULL) < 0)
1914 return NULL;
1915 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001916 res = element_get_attrib(self);
1917 }
1918
1919 if (!res)
1920 return NULL;
1921
1922 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001923 return res;
1924}
1925
Eli Benderskyef9683b2013-05-18 07:52:34 -07001926static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001927element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001928{
Eli Benderskyb20df952012-05-20 06:33:29 +03001929 char *name = "";
1930 if (PyUnicode_Check(nameobj))
1931 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001932 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001933 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001934
1935 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001936 Py_DECREF(self->tag);
1937 self->tag = value;
1938 Py_INCREF(self->tag);
1939 } else if (strcmp(name, "text") == 0) {
1940 Py_DECREF(JOIN_OBJ(self->text));
1941 self->text = value;
1942 Py_INCREF(self->text);
1943 } else if (strcmp(name, "tail") == 0) {
1944 Py_DECREF(JOIN_OBJ(self->tail));
1945 self->tail = value;
1946 Py_INCREF(self->tail);
1947 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001948 if (!self->extra) {
1949 if (create_extra(self, NULL) < 0)
1950 return -1;
1951 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001952 Py_DECREF(self->extra->attrib);
1953 self->extra->attrib = value;
1954 Py_INCREF(self->extra->attrib);
1955 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001956 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001957 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001958 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001959 }
1960
Eli Benderskyef9683b2013-05-18 07:52:34 -07001961 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001962}
1963
1964static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001965 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001966 0, /* sq_concat */
1967 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001968 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001969 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001970 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001971 0,
1972};
1973
Eli Bendersky64d11e62012-06-15 07:42:50 +03001974/******************************* Element iterator ****************************/
1975
1976/* ElementIterObject represents the iteration state over an XML element in
1977 * pre-order traversal. To keep track of which sub-element should be returned
1978 * next, a stack of parents is maintained. This is a standard stack-based
1979 * iterative pre-order traversal of a tree.
1980 * The stack is managed using a single-linked list starting at parent_stack.
1981 * Each stack node contains the saved parent to which we should return after
1982 * the current one is exhausted, and the next child to examine in that parent.
1983 */
1984typedef struct ParentLocator_t {
1985 ElementObject *parent;
1986 Py_ssize_t child_index;
1987 struct ParentLocator_t *next;
1988} ParentLocator;
1989
1990typedef struct {
1991 PyObject_HEAD
1992 ParentLocator *parent_stack;
1993 ElementObject *root_element;
1994 PyObject *sought_tag;
1995 int root_done;
1996 int gettext;
1997} ElementIterObject;
1998
1999
2000static void
2001elementiter_dealloc(ElementIterObject *it)
2002{
2003 ParentLocator *p = it->parent_stack;
2004 while (p) {
2005 ParentLocator *temp = p;
2006 Py_XDECREF(p->parent);
2007 p = p->next;
2008 PyObject_Free(temp);
2009 }
2010
2011 Py_XDECREF(it->sought_tag);
2012 Py_XDECREF(it->root_element);
2013
2014 PyObject_GC_UnTrack(it);
2015 PyObject_GC_Del(it);
2016}
2017
2018static int
2019elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2020{
2021 ParentLocator *p = it->parent_stack;
2022 while (p) {
2023 Py_VISIT(p->parent);
2024 p = p->next;
2025 }
2026
2027 Py_VISIT(it->root_element);
2028 Py_VISIT(it->sought_tag);
2029 return 0;
2030}
2031
2032/* Helper function for elementiter_next. Add a new parent to the parent stack.
2033 */
2034static ParentLocator *
2035parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2036{
2037 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2038 if (new_node) {
2039 new_node->parent = parent;
2040 Py_INCREF(parent);
2041 new_node->child_index = 0;
2042 new_node->next = stack;
2043 }
2044 return new_node;
2045}
2046
2047static PyObject *
2048elementiter_next(ElementIterObject *it)
2049{
2050 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002051 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002052 * A short note on gettext: this function serves both the iter() and
2053 * itertext() methods to avoid code duplication. However, there are a few
2054 * small differences in the way these iterations work. Namely:
2055 * - itertext() only yields text from nodes that have it, and continues
2056 * iterating when a node doesn't have text (so it doesn't return any
2057 * node like iter())
2058 * - itertext() also has to handle tail, after finishing with all the
2059 * children of a node.
2060 */
Eli Bendersky113da642012-06-15 07:52:49 +03002061 ElementObject *cur_parent;
2062 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002063 int rc;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002064
2065 while (1) {
2066 /* Handle the case reached in the beginning and end of iteration, where
2067 * the parent stack is empty. The root_done flag gives us indication
2068 * whether we've just started iterating (so root_done is 0), in which
2069 * case the root is returned. If root_done is 1 and we're here, the
2070 * iterator is exhausted.
2071 */
2072 if (!it->parent_stack->parent) {
2073 if (it->root_done) {
2074 PyErr_SetNone(PyExc_StopIteration);
2075 return NULL;
2076 } else {
2077 it->parent_stack = parent_stack_push_new(it->parent_stack,
2078 it->root_element);
2079 if (!it->parent_stack) {
2080 PyErr_NoMemory();
2081 return NULL;
2082 }
2083
2084 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002085 rc = (it->sought_tag == Py_None);
2086 if (!rc) {
2087 rc = PyObject_RichCompareBool(it->root_element->tag,
2088 it->sought_tag, Py_EQ);
2089 if (rc < 0)
2090 return NULL;
2091 }
2092 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002093 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002094 PyObject *text = element_get_text(it->root_element);
2095 if (!text)
2096 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002097 rc = PyObject_IsTrue(text);
2098 if (rc < 0)
2099 return NULL;
2100 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002101 Py_INCREF(text);
2102 return text;
2103 }
2104 } else {
2105 Py_INCREF(it->root_element);
2106 return (PyObject *)it->root_element;
2107 }
2108 }
2109 }
2110 }
2111
2112 /* See if there are children left to traverse in the current parent. If
2113 * yes, visit the next child. If not, pop the stack and try again.
2114 */
Eli Bendersky113da642012-06-15 07:52:49 +03002115 cur_parent = it->parent_stack->parent;
2116 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002117 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2118 ElementObject *child = (ElementObject *)
2119 cur_parent->extra->children[child_index];
2120 it->parent_stack->child_index++;
2121 it->parent_stack = parent_stack_push_new(it->parent_stack,
2122 child);
2123 if (!it->parent_stack) {
2124 PyErr_NoMemory();
2125 return NULL;
2126 }
2127
2128 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002129 PyObject *text = element_get_text(child);
2130 if (!text)
2131 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002132 rc = PyObject_IsTrue(text);
2133 if (rc < 0)
2134 return NULL;
2135 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002136 Py_INCREF(text);
2137 return text;
2138 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002139 } else {
2140 rc = (it->sought_tag == Py_None);
2141 if (!rc) {
2142 rc = PyObject_RichCompareBool(child->tag,
2143 it->sought_tag, Py_EQ);
2144 if (rc < 0)
2145 return NULL;
2146 }
2147 if (rc) {
2148 Py_INCREF(child);
2149 return (PyObject *)child;
2150 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002151 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 }
2153 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002154 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002155 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002156 if (it->gettext) {
2157 tail = element_get_tail(cur_parent);
2158 if (!tail)
2159 return NULL;
2160 }
2161 else
2162 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002163 Py_XDECREF(it->parent_stack->parent);
2164 PyObject_Free(it->parent_stack);
2165 it->parent_stack = next;
2166
2167 /* Note that extra condition on it->parent_stack->parent here;
2168 * this is because itertext() is supposed to only return *inner*
2169 * text, not text following the element it began iteration with.
2170 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002171 if (it->parent_stack->parent) {
2172 rc = PyObject_IsTrue(tail);
2173 if (rc < 0)
2174 return NULL;
2175 if (rc) {
2176 Py_INCREF(tail);
2177 return tail;
2178 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002179 }
2180 }
2181 }
2182
2183 return NULL;
2184}
2185
2186
2187static PyTypeObject ElementIter_Type = {
2188 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002189 /* Using the module's name since the pure-Python implementation does not
2190 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002191 "_elementtree._element_iterator", /* tp_name */
2192 sizeof(ElementIterObject), /* tp_basicsize */
2193 0, /* tp_itemsize */
2194 /* methods */
2195 (destructor)elementiter_dealloc, /* tp_dealloc */
2196 0, /* tp_print */
2197 0, /* tp_getattr */
2198 0, /* tp_setattr */
2199 0, /* tp_reserved */
2200 0, /* tp_repr */
2201 0, /* tp_as_number */
2202 0, /* tp_as_sequence */
2203 0, /* tp_as_mapping */
2204 0, /* tp_hash */
2205 0, /* tp_call */
2206 0, /* tp_str */
2207 0, /* tp_getattro */
2208 0, /* tp_setattro */
2209 0, /* tp_as_buffer */
2210 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2211 0, /* tp_doc */
2212 (traverseproc)elementiter_traverse, /* tp_traverse */
2213 0, /* tp_clear */
2214 0, /* tp_richcompare */
2215 0, /* tp_weaklistoffset */
2216 PyObject_SelfIter, /* tp_iter */
2217 (iternextfunc)elementiter_next, /* tp_iternext */
2218 0, /* tp_methods */
2219 0, /* tp_members */
2220 0, /* tp_getset */
2221 0, /* tp_base */
2222 0, /* tp_dict */
2223 0, /* tp_descr_get */
2224 0, /* tp_descr_set */
2225 0, /* tp_dictoffset */
2226 0, /* tp_init */
2227 0, /* tp_alloc */
2228 0, /* tp_new */
2229};
2230
2231
2232static PyObject *
2233create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2234{
2235 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002236
2237 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2238 if (!it)
2239 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002240
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002241 if (PyUnicode_Check(tag)) {
2242 if (PyUnicode_READY(tag) < 0)
2243 return NULL;
2244 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
2245 tag = Py_None;
2246 }
2247 else if (PyBytes_Check(tag)) {
2248 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
2249 tag = Py_None;
2250 }
Victor Stinner4d463432013-07-11 23:05:03 +02002251
2252 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002253 it->sought_tag = tag;
2254 it->root_done = 0;
2255 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002256 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 it->root_element = self;
2258
Eli Bendersky64d11e62012-06-15 07:42:50 +03002259 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002260
2261 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2262 if (it->parent_stack == NULL) {
2263 Py_DECREF(it);
2264 PyErr_NoMemory();
2265 return NULL;
2266 }
2267 it->parent_stack->parent = NULL;
2268 it->parent_stack->child_index = 0;
2269 it->parent_stack->next = NULL;
2270
Eli Bendersky64d11e62012-06-15 07:42:50 +03002271 return (PyObject *)it;
2272}
2273
2274
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002275/* ==================================================================== */
2276/* the tree builder type */
2277
2278typedef struct {
2279 PyObject_HEAD
2280
Eli Bendersky58d548d2012-05-29 15:45:16 +03002281 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
Antoine Pitrouee329312012-10-04 19:53:29 +02002283 PyObject *this; /* current node */
2284 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
Eli Bendersky58d548d2012-05-29 15:45:16 +03002286 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
Eli Bendersky58d548d2012-05-29 15:45:16 +03002288 PyObject *stack; /* element stack */
2289 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290
Eli Bendersky48d358b2012-05-30 17:57:50 +03002291 PyObject *element_factory;
2292
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002293 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002294 PyObject *events; /* list of events, or NULL if not collecting */
2295 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2296 PyObject *end_event_obj;
2297 PyObject *start_ns_event_obj;
2298 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002299} TreeBuilderObject;
2300
Christian Heimes90aa7642007-12-19 02:45:37 +00002301#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002302
2303/* -------------------------------------------------------------------- */
2304/* constructor and destructor */
2305
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306static PyObject *
2307treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002308{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002309 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2310 if (t != NULL) {
2311 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002312
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002314 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002316 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002317
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002319 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002320 t->stack = PyList_New(20);
2321 if (!t->stack) {
2322 Py_DECREF(t->this);
2323 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002324 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002325 return NULL;
2326 }
2327 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002328
Eli Bendersky58d548d2012-05-29 15:45:16 +03002329 t->events = NULL;
2330 t->start_event_obj = t->end_event_obj = NULL;
2331 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2332 }
2333 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002334}
2335
Serhiy Storchakacb985562015-05-04 15:32:48 +03002336/*[clinic input]
2337_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002338
Serhiy Storchakacb985562015-05-04 15:32:48 +03002339 element_factory: object = NULL
2340
2341[clinic start generated code]*/
2342
2343static int
2344_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2345 PyObject *element_factory)
2346/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2347{
2348 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349
2350 if (element_factory) {
2351 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002352 tmp = self->element_factory;
2353 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002354 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002355 }
2356
Eli Bendersky58d548d2012-05-29 15:45:16 +03002357 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002358}
2359
Eli Bendersky48d358b2012-05-30 17:57:50 +03002360static int
2361treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2362{
2363 Py_VISIT(self->root);
2364 Py_VISIT(self->this);
2365 Py_VISIT(self->last);
2366 Py_VISIT(self->data);
2367 Py_VISIT(self->stack);
2368 Py_VISIT(self->element_factory);
2369 return 0;
2370}
2371
2372static int
2373treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002374{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002375 Py_CLEAR(self->end_ns_event_obj);
2376 Py_CLEAR(self->start_ns_event_obj);
2377 Py_CLEAR(self->end_event_obj);
2378 Py_CLEAR(self->start_event_obj);
2379 Py_CLEAR(self->events);
2380 Py_CLEAR(self->stack);
2381 Py_CLEAR(self->data);
2382 Py_CLEAR(self->last);
2383 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002385 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002386 return 0;
2387}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002388
Eli Bendersky48d358b2012-05-30 17:57:50 +03002389static void
2390treebuilder_dealloc(TreeBuilderObject *self)
2391{
2392 PyObject_GC_UnTrack(self);
2393 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002394 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002395}
2396
2397/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002398/* helpers for handling of arbitrary element-like objects */
2399
2400static int
2401treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2402 PyObject **dest, _Py_Identifier *name)
2403{
2404 if (Element_CheckExact(element)) {
2405 Py_DECREF(JOIN_OBJ(*dest));
2406 *dest = JOIN_SET(data, PyList_CheckExact(data));
2407 return 0;
2408 }
2409 else {
2410 PyObject *joined = list_join(data);
2411 int r;
2412 if (joined == NULL)
2413 return -1;
2414 r = _PyObject_SetAttrId(element, name, joined);
2415 Py_DECREF(joined);
2416 return r;
2417 }
2418}
2419
2420/* These two functions steal a reference to data */
2421static int
2422treebuilder_set_element_text(PyObject *element, PyObject *data)
2423{
2424 _Py_IDENTIFIER(text);
2425 return treebuilder_set_element_text_or_tail(
2426 element, data, &((ElementObject *) element)->text, &PyId_text);
2427}
2428
2429static int
2430treebuilder_set_element_tail(PyObject *element, PyObject *data)
2431{
2432 _Py_IDENTIFIER(tail);
2433 return treebuilder_set_element_text_or_tail(
2434 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2435}
2436
2437static int
2438treebuilder_add_subelement(PyObject *element, PyObject *child)
2439{
2440 _Py_IDENTIFIER(append);
2441 if (Element_CheckExact(element)) {
2442 ElementObject *elem = (ElementObject *) element;
2443 return element_add_subelement(elem, child);
2444 }
2445 else {
2446 PyObject *res;
2447 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2448 if (res == NULL)
2449 return -1;
2450 Py_DECREF(res);
2451 return 0;
2452 }
2453}
2454
2455/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002456/* handlers */
2457
2458LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002459treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2460 PyObject* attrib)
2461{
2462 PyObject* node;
2463 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002464 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002465
2466 if (self->data) {
2467 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002468 if (treebuilder_set_element_text(self->last, self->data))
2469 return NULL;
2470 }
2471 else {
2472 if (treebuilder_set_element_tail(self->last, self->data))
2473 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002474 }
2475 self->data = NULL;
2476 }
2477
Eli Bendersky08231a92013-05-18 15:47:16 -07002478 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002479 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2480 } else {
2481 node = create_new_element(tag, attrib);
2482 }
2483 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002484 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002485 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002486
Antoine Pitrouee329312012-10-04 19:53:29 +02002487 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002488
2489 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002490 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002491 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002492 } else {
2493 if (self->root) {
2494 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002495 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002496 "multiple elements on top level"
2497 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002498 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499 }
2500 Py_INCREF(node);
2501 self->root = node;
2502 }
2503
2504 if (self->index < PyList_GET_SIZE(self->stack)) {
2505 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002506 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 Py_INCREF(this);
2508 } else {
2509 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002510 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511 }
2512 self->index++;
2513
2514 Py_DECREF(this);
2515 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002516 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002517
2518 Py_DECREF(self->last);
2519 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002520 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002521
2522 if (self->start_event_obj) {
2523 PyObject* res;
2524 PyObject* action = self->start_event_obj;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002525 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002526 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002527 PyList_Append(self->events, res);
2528 Py_DECREF(res);
2529 } else
2530 PyErr_Clear(); /* FIXME: propagate error */
2531 }
2532
2533 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002534
2535 error:
2536 Py_DECREF(node);
2537 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002538}
2539
2540LOCAL(PyObject*)
2541treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2542{
2543 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002544 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002545 /* ignore calls to data before the first call to start */
2546 Py_RETURN_NONE;
2547 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002548 /* store the first item as is */
2549 Py_INCREF(data); self->data = data;
2550 } else {
2551 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002552 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2553 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002554 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002555 /* expat often generates single character data sections; handle
2556 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002557 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2558 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002559 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002560 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002561 } else if (PyList_CheckExact(self->data)) {
2562 if (PyList_Append(self->data, data) < 0)
2563 return NULL;
2564 } else {
2565 PyObject* list = PyList_New(2);
2566 if (!list)
2567 return NULL;
2568 PyList_SET_ITEM(list, 0, self->data);
2569 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2570 self->data = list;
2571 }
2572 }
2573
2574 Py_RETURN_NONE;
2575}
2576
2577LOCAL(PyObject*)
2578treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2579{
2580 PyObject* item;
2581
2582 if (self->data) {
2583 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002584 if (treebuilder_set_element_text(self->last, self->data))
2585 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002586 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002587 if (treebuilder_set_element_tail(self->last, self->data))
2588 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002589 }
2590 self->data = NULL;
2591 }
2592
2593 if (self->index == 0) {
2594 PyErr_SetString(
2595 PyExc_IndexError,
2596 "pop from empty stack"
2597 );
2598 return NULL;
2599 }
2600
2601 self->index--;
2602
2603 item = PyList_GET_ITEM(self->stack, self->index);
2604 Py_INCREF(item);
2605
2606 Py_DECREF(self->last);
2607
Antoine Pitrouee329312012-10-04 19:53:29 +02002608 self->last = self->this;
2609 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002610
2611 if (self->end_event_obj) {
2612 PyObject* res;
2613 PyObject* action = self->end_event_obj;
2614 PyObject* node = (PyObject*) self->last;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002615 res = PyTuple_Pack(2, action, node);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002616 if (res) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002617 PyList_Append(self->events, res);
2618 Py_DECREF(res);
2619 } else
2620 PyErr_Clear(); /* FIXME: propagate error */
2621 }
2622
2623 Py_INCREF(self->last);
2624 return (PyObject*) self->last;
2625}
2626
2627LOCAL(void)
2628treebuilder_handle_namespace(TreeBuilderObject* self, int start,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002629 PyObject *prefix, PyObject *uri)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002630{
2631 PyObject* res;
2632 PyObject* action;
2633 PyObject* parcel;
2634
2635 if (!self->events)
2636 return;
2637
2638 if (start) {
2639 if (!self->start_ns_event_obj)
2640 return;
2641 action = self->start_ns_event_obj;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002642 parcel = Py_BuildValue("OO", prefix, uri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002643 if (!parcel)
2644 return;
2645 Py_INCREF(action);
2646 } else {
2647 if (!self->end_ns_event_obj)
2648 return;
2649 action = self->end_ns_event_obj;
2650 Py_INCREF(action);
2651 parcel = Py_None;
2652 Py_INCREF(parcel);
2653 }
2654
2655 res = PyTuple_New(2);
2656
2657 if (res) {
2658 PyTuple_SET_ITEM(res, 0, action);
2659 PyTuple_SET_ITEM(res, 1, parcel);
2660 PyList_Append(self->events, res);
2661 Py_DECREF(res);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002662 }
2663 else {
2664 Py_DECREF(action);
2665 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002666 PyErr_Clear(); /* FIXME: propagate error */
Antoine Pitrouc1948842012-10-01 23:40:37 +02002667 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002668}
2669
2670/* -------------------------------------------------------------------- */
2671/* methods (in alphabetical order) */
2672
Serhiy Storchakacb985562015-05-04 15:32:48 +03002673/*[clinic input]
2674_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002675
Serhiy Storchakacb985562015-05-04 15:32:48 +03002676 data: object
2677 /
2678
2679[clinic start generated code]*/
2680
2681static PyObject *
2682_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2683/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2684{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002685 return treebuilder_handle_data(self, data);
2686}
2687
Serhiy Storchakacb985562015-05-04 15:32:48 +03002688/*[clinic input]
2689_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002690
Serhiy Storchakacb985562015-05-04 15:32:48 +03002691 tag: object
2692 /
2693
2694[clinic start generated code]*/
2695
2696static PyObject *
2697_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2698/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2699{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002700 return treebuilder_handle_end(self, tag);
2701}
2702
2703LOCAL(PyObject*)
2704treebuilder_done(TreeBuilderObject* self)
2705{
2706 PyObject* res;
2707
2708 /* FIXME: check stack size? */
2709
2710 if (self->root)
2711 res = self->root;
2712 else
2713 res = Py_None;
2714
2715 Py_INCREF(res);
2716 return res;
2717}
2718
Serhiy Storchakacb985562015-05-04 15:32:48 +03002719/*[clinic input]
2720_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721
Serhiy Storchakacb985562015-05-04 15:32:48 +03002722[clinic start generated code]*/
2723
2724static PyObject *
2725_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2726/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2727{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002728 return treebuilder_done(self);
2729}
2730
Serhiy Storchakacb985562015-05-04 15:32:48 +03002731/*[clinic input]
2732_elementtree.TreeBuilder.start
2733
2734 tag: object
2735 attrs: object = None
2736 /
2737
2738[clinic start generated code]*/
2739
2740static PyObject *
2741_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2742 PyObject *attrs)
2743/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002744{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002745 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746}
2747
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002748/* ==================================================================== */
2749/* the expat interface */
2750
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002751#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002752#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002753
2754/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2755 * cached globally without being in per-module state.
2756 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002757static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002758#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002759
Eli Bendersky52467b12012-06-01 07:13:08 +03002760static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2761 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2762
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763typedef struct {
2764 PyObject_HEAD
2765
2766 XML_Parser parser;
2767
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002768 PyObject *target;
2769 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002770
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002771 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002772
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002773 PyObject *handle_start;
2774 PyObject *handle_data;
2775 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002776
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002777 PyObject *handle_comment;
2778 PyObject *handle_pi;
2779 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002781 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002782
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002783} XMLParserObject;
2784
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002785static PyObject*
2786_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2787static PyObject *
2788_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2789 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002790
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002791/* helpers */
2792
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002793LOCAL(PyObject*)
2794makeuniversal(XMLParserObject* self, const char* string)
2795{
2796 /* convert a UTF-8 tag/attribute name from the expat parser
2797 to a universal name string */
2798
Antoine Pitrouc1948842012-10-01 23:40:37 +02002799 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002800 PyObject* key;
2801 PyObject* value;
2802
2803 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002804 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002805 if (!key)
2806 return NULL;
2807
2808 value = PyDict_GetItem(self->names, key);
2809
2810 if (value) {
2811 Py_INCREF(value);
2812 } else {
2813 /* new name. convert to universal name, and decode as
2814 necessary */
2815
2816 PyObject* tag;
2817 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002818 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002819
2820 /* look for namespace separator */
2821 for (i = 0; i < size; i++)
2822 if (string[i] == '}')
2823 break;
2824 if (i != size) {
2825 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002826 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002827 if (tag == NULL) {
2828 Py_DECREF(key);
2829 return NULL;
2830 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002831 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002832 p[0] = '{';
2833 memcpy(p+1, string, size);
2834 size++;
2835 } else {
2836 /* plain name; use key as tag */
2837 Py_INCREF(key);
2838 tag = key;
2839 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002840
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002841 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002842 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002843 value = PyUnicode_DecodeUTF8(p, size, "strict");
2844 Py_DECREF(tag);
2845 if (!value) {
2846 Py_DECREF(key);
2847 return NULL;
2848 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002849
2850 /* add to names dictionary */
2851 if (PyDict_SetItem(self->names, key, value) < 0) {
2852 Py_DECREF(key);
2853 Py_DECREF(value);
2854 return NULL;
2855 }
2856 }
2857
2858 Py_DECREF(key);
2859 return value;
2860}
2861
Eli Bendersky5b77d812012-03-16 08:20:05 +02002862/* Set the ParseError exception with the given parameters.
2863 * If message is not NULL, it's used as the error string. Otherwise, the
2864 * message string is the default for the given error_code.
2865*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002866static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002867expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2868 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002869{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002870 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002871 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002872
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002873 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002874 message ? message : EXPAT(ErrorString)(error_code),
2875 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002876 if (errmsg == NULL)
2877 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002878
Eli Bendersky532d03e2013-08-10 08:00:39 -07002879 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002880 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002881 if (!error)
2882 return;
2883
Eli Bendersky5b77d812012-03-16 08:20:05 +02002884 /* Add code and position attributes */
2885 code = PyLong_FromLong((long)error_code);
2886 if (!code) {
2887 Py_DECREF(error);
2888 return;
2889 }
2890 if (PyObject_SetAttrString(error, "code", code) == -1) {
2891 Py_DECREF(error);
2892 Py_DECREF(code);
2893 return;
2894 }
2895 Py_DECREF(code);
2896
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002897 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002898 if (!position) {
2899 Py_DECREF(error);
2900 return;
2901 }
2902 if (PyObject_SetAttrString(error, "position", position) == -1) {
2903 Py_DECREF(error);
2904 Py_DECREF(position);
2905 return;
2906 }
2907 Py_DECREF(position);
2908
Eli Bendersky532d03e2013-08-10 08:00:39 -07002909 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002910 Py_DECREF(error);
2911}
2912
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002913/* -------------------------------------------------------------------- */
2914/* handlers */
2915
2916static void
2917expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2918 int data_len)
2919{
2920 PyObject* key;
2921 PyObject* value;
2922 PyObject* res;
2923
2924 if (data_len < 2 || data_in[0] != '&')
2925 return;
2926
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002927 if (PyErr_Occurred())
2928 return;
2929
Neal Norwitz0269b912007-08-08 06:56:02 +00002930 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 if (!key)
2932 return;
2933
2934 value = PyDict_GetItem(self->entity, key);
2935
2936 if (value) {
2937 if (TreeBuilder_CheckExact(self->target))
2938 res = treebuilder_handle_data(
2939 (TreeBuilderObject*) self->target, value
2940 );
2941 else if (self->handle_data)
2942 res = PyObject_CallFunction(self->handle_data, "O", value);
2943 else
2944 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002945 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002946 } else if (!PyErr_Occurred()) {
2947 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002948 char message[128] = "undefined entity ";
2949 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002950 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002951 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002953 EXPAT(GetErrorColumnNumber)(self->parser),
2954 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002955 );
2956 }
2957
2958 Py_DECREF(key);
2959}
2960
2961static void
2962expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2963 const XML_Char **attrib_in)
2964{
2965 PyObject* res;
2966 PyObject* tag;
2967 PyObject* attrib;
2968 int ok;
2969
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002970 if (PyErr_Occurred())
2971 return;
2972
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002973 /* tag name */
2974 tag = makeuniversal(self, tag_in);
2975 if (!tag)
2976 return; /* parser will look for errors */
2977
2978 /* attributes */
2979 if (attrib_in[0]) {
2980 attrib = PyDict_New();
2981 if (!attrib)
2982 return;
2983 while (attrib_in[0] && attrib_in[1]) {
2984 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002985 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002986 if (!key || !value) {
2987 Py_XDECREF(value);
2988 Py_XDECREF(key);
2989 Py_DECREF(attrib);
2990 return;
2991 }
2992 ok = PyDict_SetItem(attrib, key, value);
2993 Py_DECREF(value);
2994 Py_DECREF(key);
2995 if (ok < 0) {
2996 Py_DECREF(attrib);
2997 return;
2998 }
2999 attrib_in += 2;
3000 }
3001 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003002 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03003003 attrib = PyDict_New();
3004 if (!attrib)
3005 return;
3006 }
3007
3008 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003009 /* shortcut */
3010 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3011 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03003012 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003013 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003014 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003015 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016 res = NULL;
3017
3018 Py_DECREF(tag);
3019 Py_DECREF(attrib);
3020
3021 Py_XDECREF(res);
3022}
3023
3024static void
3025expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3026 int data_len)
3027{
3028 PyObject* data;
3029 PyObject* res;
3030
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003031 if (PyErr_Occurred())
3032 return;
3033
Neal Norwitz0269b912007-08-08 06:56:02 +00003034 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003035 if (!data)
3036 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003037
3038 if (TreeBuilder_CheckExact(self->target))
3039 /* shortcut */
3040 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3041 else if (self->handle_data)
3042 res = PyObject_CallFunction(self->handle_data, "O", data);
3043 else
3044 res = NULL;
3045
3046 Py_DECREF(data);
3047
3048 Py_XDECREF(res);
3049}
3050
3051static void
3052expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3053{
3054 PyObject* tag;
3055 PyObject* res = NULL;
3056
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003057 if (PyErr_Occurred())
3058 return;
3059
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003060 if (TreeBuilder_CheckExact(self->target))
3061 /* shortcut */
3062 /* the standard tree builder doesn't look at the end tag */
3063 res = treebuilder_handle_end(
3064 (TreeBuilderObject*) self->target, Py_None
3065 );
3066 else if (self->handle_end) {
3067 tag = makeuniversal(self, tag_in);
3068 if (tag) {
3069 res = PyObject_CallFunction(self->handle_end, "O", tag);
3070 Py_DECREF(tag);
3071 }
3072 }
3073
3074 Py_XDECREF(res);
3075}
3076
3077static void
3078expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3079 const XML_Char *uri)
3080{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003081 PyObject* sprefix = NULL;
3082 PyObject* suri = NULL;
3083
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003084 if (PyErr_Occurred())
3085 return;
3086
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003087 if (uri)
Eli Bendersky4b795182013-11-28 06:33:21 -08003088 suri = PyUnicode_DecodeUTF8(uri, strlen(uri), "strict");
Eli Bendersky5dd40e52013-11-28 06:31:58 -08003089 else
Eli Bendersky4b795182013-11-28 06:33:21 -08003090 suri = PyUnicode_FromString("");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003091 if (!suri)
3092 return;
3093
3094 if (prefix)
3095 sprefix = PyUnicode_DecodeUTF8(prefix, strlen(prefix), "strict");
3096 else
3097 sprefix = PyUnicode_FromString("");
3098 if (!sprefix) {
3099 Py_DECREF(suri);
3100 return;
3101 }
3102
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003103 treebuilder_handle_namespace(
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003104 (TreeBuilderObject*) self->target, 1, sprefix, suri
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003105 );
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003106
3107 Py_DECREF(sprefix);
3108 Py_DECREF(suri);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003109}
3110
3111static void
3112expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3113{
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003114 if (PyErr_Occurred())
3115 return;
3116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003117 treebuilder_handle_namespace(
3118 (TreeBuilderObject*) self->target, 0, NULL, NULL
3119 );
3120}
3121
3122static void
3123expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3124{
3125 PyObject* comment;
3126 PyObject* res;
3127
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003128 if (PyErr_Occurred())
3129 return;
3130
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003131 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003132 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003133 if (comment) {
3134 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3135 Py_XDECREF(res);
3136 Py_DECREF(comment);
3137 }
3138 }
3139}
3140
Eli Bendersky45839902013-01-13 05:14:47 -08003141static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003142expat_start_doctype_handler(XMLParserObject *self,
3143 const XML_Char *doctype_name,
3144 const XML_Char *sysid,
3145 const XML_Char *pubid,
3146 int has_internal_subset)
3147{
3148 PyObject *self_pyobj = (PyObject *)self;
3149 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3150 PyObject *parser_doctype = NULL;
3151 PyObject *res = NULL;
3152
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003153 if (PyErr_Occurred())
3154 return;
3155
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003156 doctype_name_obj = makeuniversal(self, doctype_name);
3157 if (!doctype_name_obj)
3158 return;
3159
3160 if (sysid) {
3161 sysid_obj = makeuniversal(self, sysid);
3162 if (!sysid_obj) {
3163 Py_DECREF(doctype_name_obj);
3164 return;
3165 }
3166 } else {
3167 Py_INCREF(Py_None);
3168 sysid_obj = Py_None;
3169 }
3170
3171 if (pubid) {
3172 pubid_obj = makeuniversal(self, pubid);
3173 if (!pubid_obj) {
3174 Py_DECREF(doctype_name_obj);
3175 Py_DECREF(sysid_obj);
3176 return;
3177 }
3178 } else {
3179 Py_INCREF(Py_None);
3180 pubid_obj = Py_None;
3181 }
3182
3183 /* If the target has a handler for doctype, call it. */
3184 if (self->handle_doctype) {
3185 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3186 doctype_name_obj, pubid_obj, sysid_obj);
3187 Py_CLEAR(res);
3188 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003189 else {
3190 /* Now see if the parser itself has a doctype method. If yes and it's
3191 * a custom method, call it but warn about deprecation. If it's only
3192 * the vanilla XMLParser method, do nothing.
3193 */
3194 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3195 if (parser_doctype &&
3196 !(PyCFunction_Check(parser_doctype) &&
3197 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3198 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003199 (PyCFunction) _elementtree_XMLParser_doctype)) {
3200 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3201 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003202 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003203 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003204 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003205 res = PyObject_CallFunction(parser_doctype, "OOO",
3206 doctype_name_obj, pubid_obj, sysid_obj);
3207 Py_CLEAR(res);
3208 }
3209 }
3210
3211clear:
3212 Py_XDECREF(parser_doctype);
3213 Py_DECREF(doctype_name_obj);
3214 Py_DECREF(pubid_obj);
3215 Py_DECREF(sysid_obj);
3216}
3217
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003218static void
3219expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3220 const XML_Char* data_in)
3221{
3222 PyObject* target;
3223 PyObject* data;
3224 PyObject* res;
3225
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003226 if (PyErr_Occurred())
3227 return;
3228
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003229 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003230 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3231 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003232 if (target && data) {
3233 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3234 Py_XDECREF(res);
3235 Py_DECREF(data);
3236 Py_DECREF(target);
3237 } else {
3238 Py_XDECREF(data);
3239 Py_XDECREF(target);
3240 }
3241 }
3242}
3243
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003245
Eli Bendersky52467b12012-06-01 07:13:08 +03003246static PyObject *
3247xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003248{
Eli Bendersky52467b12012-06-01 07:13:08 +03003249 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3250 if (self) {
3251 self->parser = NULL;
3252 self->target = self->entity = self->names = NULL;
3253 self->handle_start = self->handle_data = self->handle_end = NULL;
3254 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003255 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003256 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003257 return (PyObject *)self;
3258}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259
Serhiy Storchakacb985562015-05-04 15:32:48 +03003260/*[clinic input]
3261_elementtree.XMLParser.__init__
3262
3263 html: object = NULL
3264 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003265 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003266
3267[clinic start generated code]*/
3268
Eli Bendersky52467b12012-06-01 07:13:08 +03003269static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3271 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003272/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003273{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003274 self->entity = PyDict_New();
3275 if (!self->entity)
3276 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003277
Serhiy Storchakacb985562015-05-04 15:32:48 +03003278 self->names = PyDict_New();
3279 if (!self->names) {
3280 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003281 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003283
Serhiy Storchakacb985562015-05-04 15:32:48 +03003284 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3285 if (!self->parser) {
3286 Py_CLEAR(self->entity);
3287 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 }
3291
Eli Bendersky52467b12012-06-01 07:13:08 +03003292 if (target) {
3293 Py_INCREF(target);
3294 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003295 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003296 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 Py_CLEAR(self->entity);
3298 Py_CLEAR(self->names);
3299 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003300 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003301 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003302 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003303 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304
Serhiy Storchakacb985562015-05-04 15:32:48 +03003305 self->handle_start = PyObject_GetAttrString(target, "start");
3306 self->handle_data = PyObject_GetAttrString(target, "data");
3307 self->handle_end = PyObject_GetAttrString(target, "end");
3308 self->handle_comment = PyObject_GetAttrString(target, "comment");
3309 self->handle_pi = PyObject_GetAttrString(target, "pi");
3310 self->handle_close = PyObject_GetAttrString(target, "close");
3311 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003312
3313 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003314
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003316 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003318 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319 (XML_StartElementHandler) expat_start_handler,
3320 (XML_EndElementHandler) expat_end_handler
3321 );
3322 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003323 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003324 (XML_DefaultHandler) expat_default_handler
3325 );
3326 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003327 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003328 (XML_CharacterDataHandler) expat_data_handler
3329 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003330 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003331 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003332 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003333 (XML_CommentHandler) expat_comment_handler
3334 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003335 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003336 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003337 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338 (XML_ProcessingInstructionHandler) expat_pi_handler
3339 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003340 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003341 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003342 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3343 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003344 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003345 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003346 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003347 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
Eli Bendersky52467b12012-06-01 07:13:08 +03003349 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003350}
3351
Eli Bendersky52467b12012-06-01 07:13:08 +03003352static int
3353xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3354{
3355 Py_VISIT(self->handle_close);
3356 Py_VISIT(self->handle_pi);
3357 Py_VISIT(self->handle_comment);
3358 Py_VISIT(self->handle_end);
3359 Py_VISIT(self->handle_data);
3360 Py_VISIT(self->handle_start);
3361
3362 Py_VISIT(self->target);
3363 Py_VISIT(self->entity);
3364 Py_VISIT(self->names);
3365
3366 return 0;
3367}
3368
3369static int
3370xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003371{
3372 EXPAT(ParserFree)(self->parser);
3373
Antoine Pitrouc1948842012-10-01 23:40:37 +02003374 Py_CLEAR(self->handle_close);
3375 Py_CLEAR(self->handle_pi);
3376 Py_CLEAR(self->handle_comment);
3377 Py_CLEAR(self->handle_end);
3378 Py_CLEAR(self->handle_data);
3379 Py_CLEAR(self->handle_start);
3380 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381
Antoine Pitrouc1948842012-10-01 23:40:37 +02003382 Py_CLEAR(self->target);
3383 Py_CLEAR(self->entity);
3384 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385
Eli Bendersky52467b12012-06-01 07:13:08 +03003386 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003387}
3388
Eli Bendersky52467b12012-06-01 07:13:08 +03003389static void
3390xmlparser_dealloc(XMLParserObject* self)
3391{
3392 PyObject_GC_UnTrack(self);
3393 xmlparser_gc_clear(self);
3394 Py_TYPE(self)->tp_free((PyObject *)self);
3395}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396
3397LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003398expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003399{
3400 int ok;
3401
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003402 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3404
3405 if (PyErr_Occurred())
3406 return NULL;
3407
3408 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003409 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003410 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003411 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003412 EXPAT(GetErrorColumnNumber)(self->parser),
3413 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003414 );
3415 return NULL;
3416 }
3417
3418 Py_RETURN_NONE;
3419}
3420
Serhiy Storchakacb985562015-05-04 15:32:48 +03003421/*[clinic input]
3422_elementtree.XMLParser.close
3423
3424[clinic start generated code]*/
3425
3426static PyObject *
3427_elementtree_XMLParser_close_impl(XMLParserObject *self)
3428/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003429{
3430 /* end feeding data to parser */
3431
3432 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003433 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003434 if (!res)
3435 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003436
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003437 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003438 Py_DECREF(res);
3439 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003440 }
3441 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003442 Py_DECREF(res);
3443 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003444 }
3445 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003446 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003447 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003448}
3449
Serhiy Storchakacb985562015-05-04 15:32:48 +03003450/*[clinic input]
3451_elementtree.XMLParser.feed
3452
3453 data: object
3454 /
3455
3456[clinic start generated code]*/
3457
3458static PyObject *
3459_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3460/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003461{
3462 /* feed data to parser */
3463
Serhiy Storchakacb985562015-05-04 15:32:48 +03003464 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003465 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003466 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3467 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003468 return NULL;
3469 if (data_len > INT_MAX) {
3470 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3471 return NULL;
3472 }
3473 /* Explicitly set UTF-8 encoding. Return code ignored. */
3474 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003475 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003476 }
3477 else {
3478 Py_buffer view;
3479 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003480 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003481 return NULL;
3482 if (view.len > INT_MAX) {
3483 PyBuffer_Release(&view);
3484 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3485 return NULL;
3486 }
3487 res = expat_parse(self, view.buf, (int)view.len, 0);
3488 PyBuffer_Release(&view);
3489 return res;
3490 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003491}
3492
Serhiy Storchakacb985562015-05-04 15:32:48 +03003493/*[clinic input]
3494_elementtree.XMLParser._parse_whole
3495
3496 file: object
3497 /
3498
3499[clinic start generated code]*/
3500
3501static PyObject *
3502_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3503/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003504{
Eli Benderskya3699232013-05-19 18:47:23 -07003505 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003506 PyObject* reader;
3507 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003508 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509 PyObject* res;
3510
Serhiy Storchakacb985562015-05-04 15:32:48 +03003511 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003512 if (!reader)
3513 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003514
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003515 /* read from open file object */
3516 for (;;) {
3517
3518 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3519
3520 if (!buffer) {
3521 /* read failed (e.g. due to KeyboardInterrupt) */
3522 Py_DECREF(reader);
3523 return NULL;
3524 }
3525
Eli Benderskyf996e772012-03-16 05:53:30 +02003526 if (PyUnicode_CheckExact(buffer)) {
3527 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003528 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003529 Py_DECREF(buffer);
3530 break;
3531 }
3532 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003533 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003534 if (!temp) {
3535 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003536 Py_DECREF(reader);
3537 return NULL;
3538 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003539 buffer = temp;
3540 }
3541 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003542 Py_DECREF(buffer);
3543 break;
3544 }
3545
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003546 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3547 Py_DECREF(buffer);
3548 Py_DECREF(reader);
3549 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3550 return NULL;
3551 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003552 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003553 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003554 );
3555
3556 Py_DECREF(buffer);
3557
3558 if (!res) {
3559 Py_DECREF(reader);
3560 return NULL;
3561 }
3562 Py_DECREF(res);
3563
3564 }
3565
3566 Py_DECREF(reader);
3567
3568 res = expat_parse(self, "", 0, 1);
3569
3570 if (res && TreeBuilder_CheckExact(self->target)) {
3571 Py_DECREF(res);
3572 return treebuilder_done((TreeBuilderObject*) self->target);
3573 }
3574
3575 return res;
3576}
3577
Serhiy Storchakacb985562015-05-04 15:32:48 +03003578/*[clinic input]
3579_elementtree.XMLParser.doctype
3580
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003581 name: object
3582 pubid: object
3583 system: object
3584 /
3585
Serhiy Storchakacb985562015-05-04 15:32:48 +03003586[clinic start generated code]*/
3587
3588static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003589_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3590 PyObject *pubid, PyObject *system)
3591/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003592{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003593 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3594 "This method of XMLParser is deprecated. Define"
3595 " doctype() method on the TreeBuilder target.",
3596 1) < 0) {
3597 return NULL;
3598 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003599 Py_RETURN_NONE;
3600}
3601
Serhiy Storchakacb985562015-05-04 15:32:48 +03003602/*[clinic input]
3603_elementtree.XMLParser._setevents
3604
3605 events_queue: object(subclass_of='&PyList_Type')
3606 events_to_report: object = None
3607 /
3608
3609[clinic start generated code]*/
3610
3611static PyObject *
3612_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3613 PyObject *events_queue,
3614 PyObject *events_to_report)
3615/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003616{
3617 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003618 Py_ssize_t i, seqlen;
3619 TreeBuilderObject *target;
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003620 PyObject *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003621
3622 if (!TreeBuilder_CheckExact(self->target)) {
3623 PyErr_SetString(
3624 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003625 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003626 "targets"
3627 );
3628 return NULL;
3629 }
3630
3631 target = (TreeBuilderObject*) self->target;
3632
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003633 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003634 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003635 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003636
3637 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003638 Py_CLEAR(target->start_event_obj);
3639 Py_CLEAR(target->end_event_obj);
3640 Py_CLEAR(target->start_ns_event_obj);
3641 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003643 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003645 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 Py_RETURN_NONE;
3647 }
3648
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003649 if (!(events_seq = PySequence_Fast(events_to_report,
3650 "events must be a sequence"))) {
3651 return NULL;
3652 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003653
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003654 seqlen = PySequence_Size(events_seq);
3655 for (i = 0; i < seqlen; ++i) {
3656 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3657 char *event_name = NULL;
3658 if (PyUnicode_Check(event_name_obj)) {
3659 event_name = _PyUnicode_AsString(event_name_obj);
3660 } else if (PyBytes_Check(event_name_obj)) {
3661 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003662 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663
3664 if (event_name == NULL) {
3665 Py_DECREF(events_seq);
3666 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3667 return NULL;
3668 } else if (strcmp(event_name, "start") == 0) {
3669 Py_INCREF(event_name_obj);
3670 target->start_event_obj = event_name_obj;
3671 } else if (strcmp(event_name, "end") == 0) {
3672 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003673 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003674 target->end_event_obj = event_name_obj;
3675 } else if (strcmp(event_name, "start-ns") == 0) {
3676 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003677 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003678 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003679 EXPAT(SetNamespaceDeclHandler)(
3680 self->parser,
3681 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3682 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3683 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003684 } else if (strcmp(event_name, "end-ns") == 0) {
3685 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003686 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003687 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003688 EXPAT(SetNamespaceDeclHandler)(
3689 self->parser,
3690 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3691 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3692 );
3693 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003694 Py_DECREF(events_seq);
3695 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003696 return NULL;
3697 }
3698 }
3699
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003700 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003701 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003702}
3703
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003704static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003705xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003706{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003707 if (PyUnicode_Check(nameobj)) {
3708 PyObject* res;
3709 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3710 res = self->entity;
3711 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3712 res = self->target;
3713 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3714 return PyUnicode_FromFormat(
3715 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003716 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003717 }
3718 else
3719 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720
Alexander Belopolskye239d232010-12-08 23:31:48 +00003721 Py_INCREF(res);
3722 return res;
3723 }
3724 generic:
3725 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003726}
3727
Serhiy Storchakacb985562015-05-04 15:32:48 +03003728#include "clinic/_elementtree.c.h"
3729
3730static PyMethodDef element_methods[] = {
3731
3732 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3733
3734 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3735 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3736
3737 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3738 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3739 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3740
3741 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3742 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3743 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3744 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3745
3746 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3747 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3748 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3749
3750 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3751 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3752
3753 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3754 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3755
3756 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3757
3758 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3759 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3760 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3761 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3762 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3763
3764 {NULL, NULL}
3765};
3766
3767static PyMappingMethods element_as_mapping = {
3768 (lenfunc) element_length,
3769 (binaryfunc) element_subscr,
3770 (objobjargproc) element_ass_subscr,
3771};
3772
3773static PyTypeObject Element_Type = {
3774 PyVarObject_HEAD_INIT(NULL, 0)
3775 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3776 /* methods */
3777 (destructor)element_dealloc, /* tp_dealloc */
3778 0, /* tp_print */
3779 0, /* tp_getattr */
3780 0, /* tp_setattr */
3781 0, /* tp_reserved */
3782 (reprfunc)element_repr, /* tp_repr */
3783 0, /* tp_as_number */
3784 &element_as_sequence, /* tp_as_sequence */
3785 &element_as_mapping, /* tp_as_mapping */
3786 0, /* tp_hash */
3787 0, /* tp_call */
3788 0, /* tp_str */
3789 (getattrofunc)element_getattro, /* tp_getattro */
3790 (setattrofunc)element_setattro, /* tp_setattro */
3791 0, /* tp_as_buffer */
3792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3793 /* tp_flags */
3794 0, /* tp_doc */
3795 (traverseproc)element_gc_traverse, /* tp_traverse */
3796 (inquiry)element_gc_clear, /* tp_clear */
3797 0, /* tp_richcompare */
3798 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3799 0, /* tp_iter */
3800 0, /* tp_iternext */
3801 element_methods, /* tp_methods */
3802 0, /* tp_members */
3803 0, /* tp_getset */
3804 0, /* tp_base */
3805 0, /* tp_dict */
3806 0, /* tp_descr_get */
3807 0, /* tp_descr_set */
3808 0, /* tp_dictoffset */
3809 (initproc)element_init, /* tp_init */
3810 PyType_GenericAlloc, /* tp_alloc */
3811 element_new, /* tp_new */
3812 0, /* tp_free */
3813};
3814
3815static PyMethodDef treebuilder_methods[] = {
3816 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3817 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3818 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3819 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3820 {NULL, NULL}
3821};
3822
3823static PyTypeObject TreeBuilder_Type = {
3824 PyVarObject_HEAD_INIT(NULL, 0)
3825 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3826 /* methods */
3827 (destructor)treebuilder_dealloc, /* tp_dealloc */
3828 0, /* tp_print */
3829 0, /* tp_getattr */
3830 0, /* tp_setattr */
3831 0, /* tp_reserved */
3832 0, /* tp_repr */
3833 0, /* tp_as_number */
3834 0, /* tp_as_sequence */
3835 0, /* tp_as_mapping */
3836 0, /* tp_hash */
3837 0, /* tp_call */
3838 0, /* tp_str */
3839 0, /* tp_getattro */
3840 0, /* tp_setattro */
3841 0, /* tp_as_buffer */
3842 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3843 /* tp_flags */
3844 0, /* tp_doc */
3845 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3846 (inquiry)treebuilder_gc_clear, /* tp_clear */
3847 0, /* tp_richcompare */
3848 0, /* tp_weaklistoffset */
3849 0, /* tp_iter */
3850 0, /* tp_iternext */
3851 treebuilder_methods, /* tp_methods */
3852 0, /* tp_members */
3853 0, /* tp_getset */
3854 0, /* tp_base */
3855 0, /* tp_dict */
3856 0, /* tp_descr_get */
3857 0, /* tp_descr_set */
3858 0, /* tp_dictoffset */
3859 _elementtree_TreeBuilder___init__, /* tp_init */
3860 PyType_GenericAlloc, /* tp_alloc */
3861 treebuilder_new, /* tp_new */
3862 0, /* tp_free */
3863};
3864
3865static PyMethodDef xmlparser_methods[] = {
3866 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3867 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3868 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3869 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3870 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3871 {NULL, NULL}
3872};
3873
Neal Norwitz227b5332006-03-22 09:28:35 +00003874static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003875 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003876 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003877 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003878 (destructor)xmlparser_dealloc, /* tp_dealloc */
3879 0, /* tp_print */
3880 0, /* tp_getattr */
3881 0, /* tp_setattr */
3882 0, /* tp_reserved */
3883 0, /* tp_repr */
3884 0, /* tp_as_number */
3885 0, /* tp_as_sequence */
3886 0, /* tp_as_mapping */
3887 0, /* tp_hash */
3888 0, /* tp_call */
3889 0, /* tp_str */
3890 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3891 0, /* tp_setattro */
3892 0, /* tp_as_buffer */
3893 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3894 /* tp_flags */
3895 0, /* tp_doc */
3896 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3897 (inquiry)xmlparser_gc_clear, /* tp_clear */
3898 0, /* tp_richcompare */
3899 0, /* tp_weaklistoffset */
3900 0, /* tp_iter */
3901 0, /* tp_iternext */
3902 xmlparser_methods, /* tp_methods */
3903 0, /* tp_members */
3904 0, /* tp_getset */
3905 0, /* tp_base */
3906 0, /* tp_dict */
3907 0, /* tp_descr_get */
3908 0, /* tp_descr_set */
3909 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003910 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003911 PyType_GenericAlloc, /* tp_alloc */
3912 xmlparser_new, /* tp_new */
3913 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003914};
3915
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003916/* ==================================================================== */
3917/* python module interface */
3918
3919static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003920 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003921 {NULL, NULL}
3922};
3923
Martin v. Löwis1a214512008-06-11 05:26:20 +00003924
Eli Bendersky532d03e2013-08-10 08:00:39 -07003925static struct PyModuleDef elementtreemodule = {
3926 PyModuleDef_HEAD_INIT,
3927 "_elementtree",
3928 NULL,
3929 sizeof(elementtreestate),
3930 _functions,
3931 NULL,
3932 elementtree_traverse,
3933 elementtree_clear,
3934 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003935};
3936
Neal Norwitzf6657e62006-12-28 04:47:50 +00003937PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003938PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003939{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003940 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003941 elementtreestate *st;
3942
3943 m = PyState_FindModule(&elementtreemodule);
3944 if (m) {
3945 Py_INCREF(m);
3946 return m;
3947 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003948
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003949 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003950 if (PyType_Ready(&ElementIter_Type) < 0)
3951 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003952 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003953 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003954 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003955 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003956 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003957 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003958
Eli Bendersky532d03e2013-08-10 08:00:39 -07003959 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003960 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003961 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003962 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003963
Eli Bendersky828efde2012-04-05 05:40:58 +03003964 if (!(temp = PyImport_ImportModule("copy")))
3965 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003966 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003967 Py_XDECREF(temp);
3968
Eli Bendersky532d03e2013-08-10 08:00:39 -07003969 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003970 return NULL;
3971
Eli Bendersky20d41742012-06-01 09:48:37 +03003972 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003973 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3974 if (expat_capi) {
3975 /* check that it's usable */
3976 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003977 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003978 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3979 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003980 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003981 PyErr_SetString(PyExc_ImportError,
3982 "pyexpat version is incompatible");
3983 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003984 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003985 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003986 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003987 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003988
Eli Bendersky532d03e2013-08-10 08:00:39 -07003989 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003990 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003991 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003992 Py_INCREF(st->parseerror_obj);
3993 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003994
Eli Bendersky092af1f2012-03-04 07:14:03 +02003995 Py_INCREF((PyObject *)&Element_Type);
3996 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3997
Eli Bendersky58d548d2012-05-29 15:45:16 +03003998 Py_INCREF((PyObject *)&TreeBuilder_Type);
3999 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
4000
Eli Bendersky52467b12012-06-01 07:13:08 +03004001 Py_INCREF((PyObject *)&XMLParser_Type);
4002 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03004003
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004004 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004005}