blob: c483d878b7268df388463c4b012712913a26b901 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
Serhiy Storchaka26861b02015-02-16 20:52:17 +020014#define PY_SSIZE_T_CLEAN
15
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030017#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000018
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000019/* -------------------------------------------------------------------- */
20/* configuration */
21
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000022/* An element can hold this many children without extra memory
23 allocations. */
24#define STATIC_CHILDREN 4
25
26/* For best performance, chose a value so that 80-90% of all nodes
27 have no more than the given number of children. Set this to zero
28 to minimize the size of the element structure itself (this only
29 helps if you have lots of leaf nodes with attributes). */
30
31/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010032 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000033 that the number of children should be an even number, at least on
34 32-bit platforms. */
35
36/* -------------------------------------------------------------------- */
37
38#if 0
39static int memory = 0;
40#define ALLOC(size, comment)\
41do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
42#define RELEASE(size, comment)\
43do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
44#else
45#define ALLOC(size, comment)
46#define RELEASE(size, comment)
47#endif
48
49/* compiler tweaks */
50#if defined(_MSC_VER)
51#define LOCAL(type) static __inline type __fastcall
52#else
53#define LOCAL(type) static type
54#endif
55
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000056/* macros used to store 'join' flags in string object pointers. note
57 that all use of text and tail as object pointers must be wrapped in
58 JOIN_OBJ. see comments in the ElementObject definition for more
59 info. */
60#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
61#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020062#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000063
Eli Benderskydd3661e2013-09-13 06:24:25 -070064/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
65 * reference since this function sets it to NULL.
66*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020067static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070068{
69 if (*p) {
70 PyObject *tmp = JOIN_OBJ(*p);
71 *p = NULL;
72 Py_DECREF(tmp);
73 }
74}
75
Ronald Oussoren138d0802013-07-19 11:11:25 +020076/* Types defined by this extension */
77static PyTypeObject Element_Type;
78static PyTypeObject ElementIter_Type;
79static PyTypeObject TreeBuilder_Type;
80static PyTypeObject XMLParser_Type;
81
82
Eli Bendersky532d03e2013-08-10 08:00:39 -070083/* Per-module state; PEP 3121 */
84typedef struct {
85 PyObject *parseerror_obj;
86 PyObject *deepcopy_obj;
87 PyObject *elementpath_obj;
88} elementtreestate;
89
90static struct PyModuleDef elementtreemodule;
91
92/* Given a module object (assumed to be _elementtree), get its per-module
93 * state.
94 */
95#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
96
97/* Find the module instance imported in the currently running sub-interpreter
98 * and get its state.
99 */
100#define ET_STATE_GLOBAL \
101 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
102
103static int
104elementtree_clear(PyObject *m)
105{
106 elementtreestate *st = ET_STATE(m);
107 Py_CLEAR(st->parseerror_obj);
108 Py_CLEAR(st->deepcopy_obj);
109 Py_CLEAR(st->elementpath_obj);
110 return 0;
111}
112
113static int
114elementtree_traverse(PyObject *m, visitproc visit, void *arg)
115{
116 elementtreestate *st = ET_STATE(m);
117 Py_VISIT(st->parseerror_obj);
118 Py_VISIT(st->deepcopy_obj);
119 Py_VISIT(st->elementpath_obj);
120 return 0;
121}
122
123static void
124elementtree_free(void *m)
125{
126 elementtree_clear((PyObject *)m);
127}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000128
129/* helpers */
130
131LOCAL(PyObject*)
132deepcopy(PyObject* object, PyObject* memo)
133{
134 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000135 PyObject* args;
136 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138
Eli Bendersky532d03e2013-08-10 08:00:39 -0700139 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000140 PyErr_SetString(
141 PyExc_RuntimeError,
142 "deepcopy helper not found"
143 );
144 return NULL;
145 }
146
Antoine Pitrouc1948842012-10-01 23:40:37 +0200147 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000148 if (!args)
149 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700150 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000151 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000152 return result;
153}
154
155LOCAL(PyObject*)
156list_join(PyObject* list)
157{
158 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000159 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000160 PyObject* result;
161
Antoine Pitrouc1948842012-10-01 23:40:37 +0200162 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000163 if (!joiner)
164 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000166 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200167 if (result)
168 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000169 return result;
170}
171
Eli Bendersky48d358b2012-05-30 17:57:50 +0300172/* Is the given object an empty dictionary?
173*/
174static int
175is_empty_dict(PyObject *obj)
176{
177 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
178}
179
180
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200182/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000183
184typedef struct {
185
186 /* attributes (a dictionary object), or None if no attributes */
187 PyObject* attrib;
188
189 /* child elements */
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200190 Py_ssize_t length; /* actual number of items */
191 Py_ssize_t allocated; /* allocated items */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000192
193 /* this either points to _children or to a malloced buffer */
194 PyObject* *children;
195
196 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100197
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000198} ElementObjectExtra;
199
200typedef struct {
201 PyObject_HEAD
202
203 /* element tag (a string). */
204 PyObject* tag;
205
206 /* text before first child. note that this is a tagged pointer;
207 use JOIN_OBJ to get the object pointer. the join flag is used
208 to distinguish lists created by the tree builder from lists
209 assigned to the attribute by application code; the former
210 should be joined before being returned to the user, the latter
211 should be left intact. */
212 PyObject* text;
213
214 /* text after this element, in parent. note that this is a tagged
215 pointer; use JOIN_OBJ to get the object pointer. */
216 PyObject* tail;
217
218 ElementObjectExtra* extra;
219
Eli Benderskyebf37a22012-04-03 22:02:37 +0300220 PyObject *weakreflist; /* For tp_weaklistoffset */
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222} ElementObject;
223
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
Christian Heimes90aa7642007-12-19 02:45:37 +0000225#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000226
227/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200228/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000229
230LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200231create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000232{
233 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200234 if (!self->extra) {
235 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200237 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000238
239 if (!attrib)
240 attrib = Py_None;
241
242 Py_INCREF(attrib);
243 self->extra->attrib = attrib;
244
245 self->extra->length = 0;
246 self->extra->allocated = STATIC_CHILDREN;
247 self->extra->children = self->extra->_children;
248
249 return 0;
250}
251
252LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200253dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000254{
Eli Bendersky08b85292012-04-04 15:55:07 +0300255 ElementObjectExtra *myextra;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200256 Py_ssize_t i;
Eli Bendersky08b85292012-04-04 15:55:07 +0300257
Eli Benderskyebf37a22012-04-03 22:02:37 +0300258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300263 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300264 self->extra = NULL;
265
266 Py_DECREF(myextra->attrib);
267
Eli Benderskyebf37a22012-04-03 22:02:37 +0300268 for (i = 0; i < myextra->length; i++)
269 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000270
Eli Benderskyebf37a22012-04-03 22:02:37 +0300271 if (myextra->children != myextra->_children)
272 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273
Eli Benderskyebf37a22012-04-03 22:02:37 +0300274 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000275}
276
Eli Bendersky092af1f2012-03-04 07:14:03 +0200277/* Convenience internal function to create new Element objects with the given
278 * tag and attributes.
279*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200281create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000282{
283 ElementObject* self;
284
Eli Bendersky0192ba32012-03-30 16:38:33 +0300285 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 if (self == NULL)
287 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 self->extra = NULL;
289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000290 Py_INCREF(tag);
291 self->tag = tag;
292
293 Py_INCREF(Py_None);
294 self->text = Py_None;
295
296 Py_INCREF(Py_None);
297 self->tail = Py_None;
298
Eli Benderskyebf37a22012-04-03 22:02:37 +0300299 self->weakreflist = NULL;
300
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200301 ALLOC(sizeof(ElementObject), "create element");
302 PyObject_GC_Track(self);
303
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200304 if (attrib != Py_None && !is_empty_dict(attrib)) {
305 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200306 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200307 return NULL;
308 }
309 }
310
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000311 return (PyObject*) self;
312}
313
Eli Bendersky092af1f2012-03-04 07:14:03 +0200314static PyObject *
315element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
316{
317 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
318 if (e != NULL) {
319 Py_INCREF(Py_None);
320 e->tag = Py_None;
321
322 Py_INCREF(Py_None);
323 e->text = Py_None;
324
325 Py_INCREF(Py_None);
326 e->tail = Py_None;
327
328 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300329 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200330 }
331 return (PyObject *)e;
332}
333
Eli Bendersky737b1732012-05-29 06:02:56 +0300334/* Helper function for extracting the attrib dictionary from a keywords dict.
335 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800336 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300337 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700338 *
339 * Return a dictionary with the content of kwds merged into the content of
340 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300341 */
342static PyObject*
343get_attrib_from_keywords(PyObject *kwds)
344{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700345 PyObject *attrib_str = PyUnicode_FromString("attrib");
346 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300347
348 if (attrib) {
349 /* If attrib was found in kwds, copy its value and remove it from
350 * kwds
351 */
352 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700353 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300354 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
355 Py_TYPE(attrib)->tp_name);
356 return NULL;
357 }
358 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700359 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300360 } else {
361 attrib = PyDict_New();
362 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700363
364 Py_DECREF(attrib_str);
365
366 /* attrib can be NULL if PyDict_New failed */
367 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200368 if (PyDict_Update(attrib, kwds) < 0)
369 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300370 return attrib;
371}
372
Serhiy Storchakacb985562015-05-04 15:32:48 +0300373/*[clinic input]
374module _elementtree
375class _elementtree.Element "ElementObject *" "&Element_Type"
376class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
377class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
378[clinic start generated code]*/
379/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
380
Eli Bendersky092af1f2012-03-04 07:14:03 +0200381static int
382element_init(PyObject *self, PyObject *args, PyObject *kwds)
383{
384 PyObject *tag;
385 PyObject *tmp;
386 PyObject *attrib = NULL;
387 ElementObject *self_elem;
388
389 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
390 return -1;
391
Eli Bendersky737b1732012-05-29 06:02:56 +0300392 if (attrib) {
393 /* attrib passed as positional arg */
394 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200395 if (!attrib)
396 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300397 if (kwds) {
398 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200399 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300400 return -1;
401 }
402 }
403 } else if (kwds) {
404 /* have keywords args */
405 attrib = get_attrib_from_keywords(kwds);
406 if (!attrib)
407 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200408 }
409
410 self_elem = (ElementObject *)self;
411
Antoine Pitrouc1948842012-10-01 23:40:37 +0200412 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200413 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200414 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200415 return -1;
416 }
417 }
418
Eli Bendersky48d358b2012-05-30 17:57:50 +0300419 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421
422 /* Replace the objects already pointed to by tag, text and tail. */
423 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(tmp);
427
428 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200429 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200430 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200431 Py_DECREF(JOIN_OBJ(tmp));
432
433 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200434 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200435 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200436 Py_DECREF(JOIN_OBJ(tmp));
437
438 return 0;
439}
440
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000441LOCAL(int)
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200442element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000443{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200444 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000445 PyObject* *children;
446
447 /* make sure self->children can hold the given number of extra
448 elements. set an exception and return -1 if allocation failed */
449
Victor Stinner5f0af232013-07-11 23:01:36 +0200450 if (!self->extra) {
451 if (create_extra(self, NULL) < 0)
452 return -1;
453 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000454
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200455 size = self->extra->length + extra; /* never overflows */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000456
457 if (size > self->extra->allocated) {
458 /* use Python 2.4's list growth strategy */
459 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000460 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100461 * which needs at least 4 bytes.
462 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000463 * be safe.
464 */
465 size = size ? size : 1;
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200466 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
467 goto nomemory;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000469 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100470 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000471 * false alarm always assume at least one child to be safe.
472 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000473 children = PyObject_Realloc(self->extra->children,
474 size * sizeof(PyObject*));
475 if (!children)
476 goto nomemory;
477 } else {
478 children = PyObject_Malloc(size * sizeof(PyObject*));
479 if (!children)
480 goto nomemory;
481 /* copy existing children from static area to malloc buffer */
482 memcpy(children, self->extra->children,
483 self->extra->length * sizeof(PyObject*));
484 }
485 self->extra->children = children;
486 self->extra->allocated = size;
487 }
488
489 return 0;
490
491 nomemory:
492 PyErr_NoMemory();
493 return -1;
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499 /* add a child element to a parent */
500
501 if (element_resize(self, 1) < 0)
502 return -1;
503
504 Py_INCREF(element);
505 self->extra->children[self->extra->length] = element;
506
507 self->extra->length++;
508
509 return 0;
510}
511
512LOCAL(PyObject*)
513element_get_attrib(ElementObject* self)
514{
515 /* return borrowed reference to attrib dictionary */
516 /* note: this function assumes that the extra section exists */
517
518 PyObject* res = self->extra->attrib;
519
520 if (res == Py_None) {
521 /* create missing dictionary */
522 res = PyDict_New();
523 if (!res)
524 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200525 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000526 self->extra->attrib = res;
527 }
528
529 return res;
530}
531
532LOCAL(PyObject*)
533element_get_text(ElementObject* self)
534{
535 /* return borrowed reference to text attribute */
536
537 PyObject* res = self->text;
538
539 if (JOIN_GET(res)) {
540 res = JOIN_OBJ(res);
541 if (PyList_CheckExact(res)) {
542 res = list_join(res);
543 if (!res)
544 return NULL;
545 self->text = res;
546 }
547 }
548
549 return res;
550}
551
552LOCAL(PyObject*)
553element_get_tail(ElementObject* self)
554{
555 /* return borrowed reference to text attribute */
556
557 PyObject* res = self->tail;
558
559 if (JOIN_GET(res)) {
560 res = JOIN_OBJ(res);
561 if (PyList_CheckExact(res)) {
562 res = list_join(res);
563 if (!res)
564 return NULL;
565 self->tail = res;
566 }
567 }
568
569 return res;
570}
571
572static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300573subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000574{
575 PyObject* elem;
576
577 ElementObject* parent;
578 PyObject* tag;
579 PyObject* attrib = NULL;
580 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
581 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800582 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000583 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800584 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000585
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (attrib) {
587 /* attrib passed as positional arg */
588 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000589 if (!attrib)
590 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300591 if (kwds) {
592 if (PyDict_Update(attrib, kwds) < 0) {
593 return NULL;
594 }
595 }
596 } else if (kwds) {
597 /* have keyword args */
598 attrib = get_attrib_from_keywords(kwds);
599 if (!attrib)
600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000601 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300602 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_INCREF(Py_None);
604 attrib = Py_None;
605 }
606
Eli Bendersky092af1f2012-03-04 07:14:03 +0200607 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000608 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200609 if (elem == NULL)
610 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000612 if (element_add_subelement(parent, elem) < 0) {
613 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000614 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000615 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000616
617 return elem;
618}
619
Eli Bendersky0192ba32012-03-30 16:38:33 +0300620static int
621element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
622{
623 Py_VISIT(self->tag);
624 Py_VISIT(JOIN_OBJ(self->text));
625 Py_VISIT(JOIN_OBJ(self->tail));
626
627 if (self->extra) {
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200628 Py_ssize_t i;
Eli Bendersky0192ba32012-03-30 16:38:33 +0300629 Py_VISIT(self->extra->attrib);
630
631 for (i = 0; i < self->extra->length; ++i)
632 Py_VISIT(self->extra->children[i]);
633 }
634 return 0;
635}
636
637static int
638element_gc_clear(ElementObject *self)
639{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300640 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700641 _clear_joined_ptr(&self->text);
642 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643
644 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300645 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300646 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300647 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300648 return 0;
649}
650
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000651static void
652element_dealloc(ElementObject* self)
653{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300655
656 if (self->weakreflist != NULL)
657 PyObject_ClearWeakRefs((PyObject *) self);
658
Eli Bendersky0192ba32012-03-30 16:38:33 +0300659 /* element_gc_clear clears all references and deallocates extra
660 */
661 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000662
663 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200664 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000665}
666
667/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000668
Serhiy Storchakacb985562015-05-04 15:32:48 +0300669/*[clinic input]
670_elementtree.Element.append
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000671
Serhiy Storchakacb985562015-05-04 15:32:48 +0300672 subelement: object(subclass_of='&Element_Type')
673 /
674
675[clinic start generated code]*/
676
677static PyObject *
678_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
679/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
680{
681 if (element_add_subelement(self, subelement) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000682 return NULL;
683
684 Py_RETURN_NONE;
685}
686
Serhiy Storchakacb985562015-05-04 15:32:48 +0300687/*[clinic input]
688_elementtree.Element.clear
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000689
Serhiy Storchakacb985562015-05-04 15:32:48 +0300690[clinic start generated code]*/
691
692static PyObject *
693_elementtree_Element_clear_impl(ElementObject *self)
694/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
695{
Eli Benderskyebf37a22012-04-03 22:02:37 +0300696 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000697
698 Py_INCREF(Py_None);
699 Py_DECREF(JOIN_OBJ(self->text));
700 self->text = Py_None;
701
702 Py_INCREF(Py_None);
703 Py_DECREF(JOIN_OBJ(self->tail));
704 self->tail = Py_None;
705
706 Py_RETURN_NONE;
707}
708
Serhiy Storchakacb985562015-05-04 15:32:48 +0300709/*[clinic input]
710_elementtree.Element.__copy__
711
712[clinic start generated code]*/
713
714static PyObject *
715_elementtree_Element___copy___impl(ElementObject *self)
716/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000717{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200718 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000719 ElementObject* element;
720
Eli Bendersky092af1f2012-03-04 07:14:03 +0200721 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800722 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723 if (!element)
724 return NULL;
725
726 Py_DECREF(JOIN_OBJ(element->text));
727 element->text = self->text;
728 Py_INCREF(JOIN_OBJ(element->text));
729
730 Py_DECREF(JOIN_OBJ(element->tail));
731 element->tail = self->tail;
732 Py_INCREF(JOIN_OBJ(element->tail));
733
734 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000735 if (element_resize(element, self->extra->length) < 0) {
736 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000737 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000738 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000739
740 for (i = 0; i < self->extra->length; i++) {
741 Py_INCREF(self->extra->children[i]);
742 element->extra->children[i] = self->extra->children[i];
743 }
744
745 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000746 }
747
748 return (PyObject*) element;
749}
750
Serhiy Storchakacb985562015-05-04 15:32:48 +0300751/*[clinic input]
752_elementtree.Element.__deepcopy__
753
754 memo: object
755 /
756
757[clinic start generated code]*/
758
759static PyObject *
760_elementtree_Element___deepcopy__(ElementObject *self, PyObject *memo)
761/*[clinic end generated code: output=d1f19851d17bf239 input=df24c2b602430b77]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000762{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200763 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000764 ElementObject* element;
765 PyObject* tag;
766 PyObject* attrib;
767 PyObject* text;
768 PyObject* tail;
769 PyObject* id;
770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000771 tag = deepcopy(self->tag, memo);
772 if (!tag)
773 return NULL;
774
775 if (self->extra) {
776 attrib = deepcopy(self->extra->attrib, memo);
777 if (!attrib) {
778 Py_DECREF(tag);
779 return NULL;
780 }
781 } else {
782 Py_INCREF(Py_None);
783 attrib = Py_None;
784 }
785
Eli Bendersky092af1f2012-03-04 07:14:03 +0200786 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000787
788 Py_DECREF(tag);
789 Py_DECREF(attrib);
790
791 if (!element)
792 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100793
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000794 text = deepcopy(JOIN_OBJ(self->text), memo);
795 if (!text)
796 goto error;
797 Py_DECREF(element->text);
798 element->text = JOIN_SET(text, JOIN_GET(self->text));
799
800 tail = deepcopy(JOIN_OBJ(self->tail), memo);
801 if (!tail)
802 goto error;
803 Py_DECREF(element->tail);
804 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
805
806 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000807 if (element_resize(element, self->extra->length) < 0)
808 goto error;
809
810 for (i = 0; i < self->extra->length; i++) {
811 PyObject* child = deepcopy(self->extra->children[i], memo);
812 if (!child) {
813 element->extra->length = i;
814 goto error;
815 }
816 element->extra->children[i] = child;
817 }
818
819 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000820 }
821
822 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200823 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000824 if (!id)
825 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000826
827 i = PyDict_SetItem(memo, id, (PyObject*) element);
828
829 Py_DECREF(id);
830
831 if (i < 0)
832 goto error;
833
834 return (PyObject*) element;
835
836 error:
837 Py_DECREF(element);
838 return NULL;
839}
840
Serhiy Storchakacb985562015-05-04 15:32:48 +0300841/*[clinic input]
842_elementtree.Element.__sizeof__ -> Py_ssize_t
843
844[clinic start generated code]*/
845
846static Py_ssize_t
847_elementtree_Element___sizeof___impl(ElementObject *self)
848/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
Martin v. Löwisbce16662012-06-17 10:41:22 +0200849{
Martin v. Löwisbce16662012-06-17 10:41:22 +0200850 Py_ssize_t result = sizeof(ElementObject);
851 if (self->extra) {
852 result += sizeof(ElementObjectExtra);
853 if (self->extra->children != self->extra->_children)
854 result += sizeof(PyObject*) * self->extra->allocated;
855 }
Serhiy Storchakacb985562015-05-04 15:32:48 +0300856 return result;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200857}
858
Eli Bendersky698bdb22013-01-10 06:01:06 -0800859/* dict keys for getstate/setstate. */
860#define PICKLED_TAG "tag"
861#define PICKLED_CHILDREN "_children"
862#define PICKLED_ATTRIB "attrib"
863#define PICKLED_TAIL "tail"
864#define PICKLED_TEXT "text"
865
866/* __getstate__ returns a fabricated instance dict as in the pure-Python
867 * Element implementation, for interoperability/interchangeability. This
868 * makes the pure-Python implementation details an API, but (a) there aren't
869 * any unnecessary structures there; and (b) it buys compatibility with 3.2
870 * pickles. See issue #16076.
871 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300872/*[clinic input]
873_elementtree.Element.__getstate__
874
875[clinic start generated code]*/
876
Eli Bendersky698bdb22013-01-10 06:01:06 -0800877static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +0300878_elementtree_Element___getstate___impl(ElementObject *self)
879/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880{
Serhiy Storchaka26861b02015-02-16 20:52:17 +0200881 Py_ssize_t i, noattrib;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 PyObject *instancedict = NULL, *children;
883
884 /* Build a list of children. */
885 children = PyList_New(self->extra ? self->extra->length : 0);
886 if (!children)
887 return NULL;
888 for (i = 0; i < PyList_GET_SIZE(children); i++) {
889 PyObject *child = self->extra->children[i];
890 Py_INCREF(child);
891 PyList_SET_ITEM(children, i, child);
892 }
893
894 /* Construct the state object. */
895 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
896 if (noattrib)
897 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
898 PICKLED_TAG, self->tag,
899 PICKLED_CHILDREN, children,
900 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700901 PICKLED_TEXT, JOIN_OBJ(self->text),
902 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800903 else
904 instancedict = Py_BuildValue("{sOsOsOsOsO}",
905 PICKLED_TAG, self->tag,
906 PICKLED_CHILDREN, children,
907 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700908 PICKLED_TEXT, JOIN_OBJ(self->text),
909 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800910 if (instancedict) {
911 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800912 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800913 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800914 else {
915 for (i = 0; i < PyList_GET_SIZE(children); i++)
916 Py_DECREF(PyList_GET_ITEM(children, i));
917 Py_DECREF(children);
918
919 return NULL;
920 }
921}
922
923static PyObject *
924element_setstate_from_attributes(ElementObject *self,
925 PyObject *tag,
926 PyObject *attrib,
927 PyObject *text,
928 PyObject *tail,
929 PyObject *children)
930{
931 Py_ssize_t i, nchildren;
932
933 if (!tag) {
934 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
935 return NULL;
936 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800937
938 Py_CLEAR(self->tag);
939 self->tag = tag;
940 Py_INCREF(self->tag);
941
Eli Benderskydd3661e2013-09-13 06:24:25 -0700942 _clear_joined_ptr(&self->text);
943 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
944 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800945
Eli Benderskydd3661e2013-09-13 06:24:25 -0700946 _clear_joined_ptr(&self->tail);
947 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
948 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800949
950 /* Handle ATTRIB and CHILDREN. */
951 if (!children && !attrib)
952 Py_RETURN_NONE;
953
954 /* Compute 'nchildren'. */
955 if (children) {
956 if (!PyList_Check(children)) {
957 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
958 return NULL;
959 }
960 nchildren = PyList_Size(children);
961 }
962 else {
963 nchildren = 0;
964 }
965
966 /* Allocate 'extra'. */
967 if (element_resize(self, nchildren)) {
968 return NULL;
969 }
970 assert(self->extra && self->extra->allocated >= nchildren);
971
972 /* Copy children */
973 for (i = 0; i < nchildren; i++) {
974 self->extra->children[i] = PyList_GET_ITEM(children, i);
975 Py_INCREF(self->extra->children[i]);
976 }
977
978 self->extra->length = nchildren;
979 self->extra->allocated = nchildren;
980
981 /* Stash attrib. */
982 if (attrib) {
983 Py_CLEAR(self->extra->attrib);
984 self->extra->attrib = attrib;
985 Py_INCREF(attrib);
986 }
987
988 Py_RETURN_NONE;
989}
990
991/* __setstate__ for Element instance from the Python implementation.
992 * 'state' should be the instance dict.
993 */
Serhiy Storchakacb985562015-05-04 15:32:48 +0300994
Eli Bendersky698bdb22013-01-10 06:01:06 -0800995static PyObject *
996element_setstate_from_Python(ElementObject *self, PyObject *state)
997{
998 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
999 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1000 PyObject *args;
1001 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001002 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001003
Eli Bendersky698bdb22013-01-10 06:01:06 -08001004 tag = attrib = text = tail = children = NULL;
1005 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001006 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001007 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001008
1009 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1010 &attrib, &text, &tail, &children))
1011 retval = element_setstate_from_attributes(self, tag, attrib, text,
1012 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -08001013 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -08001014 retval = NULL;
1015
1016 Py_DECREF(args);
1017 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -08001018}
1019
Serhiy Storchakacb985562015-05-04 15:32:48 +03001020/*[clinic input]
1021_elementtree.Element.__setstate__
1022
1023 state: object
1024 /
1025
1026[clinic start generated code]*/
1027
Eli Bendersky698bdb22013-01-10 06:01:06 -08001028static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001029_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1030/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
Eli Bendersky698bdb22013-01-10 06:01:06 -08001031{
1032 if (!PyDict_CheckExact(state)) {
1033 PyErr_Format(PyExc_TypeError,
1034 "Don't know how to unpickle \"%.200R\" as an Element",
1035 state);
1036 return NULL;
1037 }
1038 else
1039 return element_setstate_from_Python(self, state);
1040}
1041
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001042LOCAL(int)
1043checkpath(PyObject* tag)
1044{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 Py_ssize_t i;
1046 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001047
1048 /* check if a tag contains an xpath character */
1049
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050#define PATHCHAR(ch) \
1051 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001052
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001053 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1055 void *data = PyUnicode_DATA(tag);
1056 unsigned int kind = PyUnicode_KIND(tag);
1057 for (i = 0; i < len; i++) {
1058 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1059 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001060 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001061 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001062 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001064 return 1;
1065 }
1066 return 0;
1067 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001068 if (PyBytes_Check(tag)) {
1069 char *p = PyBytes_AS_STRING(tag);
1070 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001071 if (p[i] == '{')
1072 check = 0;
1073 else if (p[i] == '}')
1074 check = 1;
1075 else if (check && PATHCHAR(p[i]))
1076 return 1;
1077 }
1078 return 0;
1079 }
1080
1081 return 1; /* unknown type; might be path expression */
1082}
1083
Serhiy Storchakacb985562015-05-04 15:32:48 +03001084/*[clinic input]
1085_elementtree.Element.extend
1086
1087 elements: object
1088 /
1089
1090[clinic start generated code]*/
1091
1092static PyObject *
1093_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1094/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001095{
1096 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001097 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001098
Serhiy Storchakacb985562015-05-04 15:32:48 +03001099 seq = PySequence_Fast(elements, "");
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001100 if (!seq) {
1101 PyErr_Format(
1102 PyExc_TypeError,
Serhiy Storchakacb985562015-05-04 15:32:48 +03001103 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104 );
1105 return NULL;
1106 }
1107
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001108 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001109 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001110 Py_INCREF(element);
1111 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001112 PyErr_Format(
1113 PyExc_TypeError,
1114 "expected an Element, not \"%.200s\"",
1115 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001116 Py_DECREF(seq);
1117 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001118 return NULL;
1119 }
1120
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001121 if (element_add_subelement(self, element) < 0) {
1122 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001123 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001124 return NULL;
1125 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001127 }
1128
1129 Py_DECREF(seq);
1130
1131 Py_RETURN_NONE;
1132}
1133
Serhiy Storchakacb985562015-05-04 15:32:48 +03001134/*[clinic input]
1135_elementtree.Element.find
1136
1137 path: object
1138 namespaces: object = None
1139
1140[clinic start generated code]*/
1141
1142static PyObject *
1143_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1144 PyObject *namespaces)
1145/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001146{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001147 Py_ssize_t i;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001148 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001149
Serhiy Storchakacb985562015-05-04 15:32:48 +03001150 if (checkpath(path) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001151 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001152 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001153 st->elementpath_obj, &PyId_find, "OOO", self, path, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001154 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001155 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001156
1157 if (!self->extra)
1158 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001159
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001160 for (i = 0; i < self->extra->length; i++) {
1161 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001162 int rc;
1163 if (!Element_CheckExact(item))
1164 continue;
1165 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001166 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001167 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001168 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001169 Py_DECREF(item);
1170 if (rc < 0)
1171 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001172 }
1173
1174 Py_RETURN_NONE;
1175}
1176
Serhiy Storchakacb985562015-05-04 15:32:48 +03001177/*[clinic input]
1178_elementtree.Element.findtext
1179
1180 path: object
1181 default: object = None
1182 namespaces: object = None
1183
1184[clinic start generated code]*/
1185
1186static PyObject *
1187_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1188 PyObject *default_value,
1189 PyObject *namespaces)
1190/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001191{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001192 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001193 _Py_IDENTIFIER(findtext);
Eli Bendersky532d03e2013-08-10 08:00:39 -07001194 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001195
Serhiy Storchakacb985562015-05-04 15:32:48 +03001196 if (checkpath(path) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001197 return _PyObject_CallMethodId(
Serhiy Storchakacb985562015-05-04 15:32:48 +03001198 st->elementpath_obj, &PyId_findtext, "OOOO", self, path, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001199 );
1200
1201 if (!self->extra) {
1202 Py_INCREF(default_value);
1203 return default_value;
1204 }
1205
1206 for (i = 0; i < self->extra->length; i++) {
1207 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001208 int rc;
1209 if (!Element_CheckExact(item))
1210 continue;
1211 Py_INCREF(item);
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001212 rc = PyObject_RichCompareBool(item->tag, path, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001213 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001214 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 if (text == Py_None) {
1216 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001217 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001218 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001219 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001220 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001221 return text;
1222 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001223 Py_DECREF(item);
1224 if (rc < 0)
1225 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 Py_INCREF(default_value);
1229 return default_value;
1230}
1231
Serhiy Storchakacb985562015-05-04 15:32:48 +03001232/*[clinic input]
1233_elementtree.Element.findall
1234
1235 path: object
1236 namespaces: object = None
1237
1238[clinic start generated code]*/
1239
1240static PyObject *
1241_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1242 PyObject *namespaces)
1243/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001244{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001245 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001246 PyObject* out;
Serhiy Storchakacb985562015-05-04 15:32:48 +03001247 PyObject* tag = path;
Eli Bendersky532d03e2013-08-10 08:00:39 -07001248 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001249
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001250 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001251 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001252 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001253 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001255 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001256
1257 out = PyList_New(0);
1258 if (!out)
1259 return NULL;
1260
1261 if (!self->extra)
1262 return out;
1263
1264 for (i = 0; i < self->extra->length; i++) {
1265 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001266 int rc;
1267 if (!Element_CheckExact(item))
1268 continue;
1269 Py_INCREF(item);
1270 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1271 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1272 Py_DECREF(item);
1273 Py_DECREF(out);
1274 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001275 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001276 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001277 }
1278
1279 return out;
1280}
1281
Serhiy Storchakacb985562015-05-04 15:32:48 +03001282/*[clinic input]
1283_elementtree.Element.iterfind
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001284
Serhiy Storchakacb985562015-05-04 15:32:48 +03001285 path: object
1286 namespaces: object = None
1287
1288[clinic start generated code]*/
1289
1290static PyObject *
1291_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1292 PyObject *namespaces)
1293/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1294{
1295 PyObject* tag = path;
1296 _Py_IDENTIFIER(iterfind);
1297 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001298
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001299 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001300 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001301}
1302
Serhiy Storchakacb985562015-05-04 15:32:48 +03001303/*[clinic input]
1304_elementtree.Element.get
1305
1306 key: object
1307 default: object = None
1308
1309[clinic start generated code]*/
1310
1311static PyObject *
1312_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1313 PyObject *default_value)
1314/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001315{
1316 PyObject* value;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001317
1318 if (!self->extra || self->extra->attrib == Py_None)
1319 value = default_value;
1320 else {
1321 value = PyDict_GetItem(self->extra->attrib, key);
1322 if (!value)
1323 value = default_value;
1324 }
1325
1326 Py_INCREF(value);
1327 return value;
1328}
1329
Serhiy Storchakacb985562015-05-04 15:32:48 +03001330/*[clinic input]
1331_elementtree.Element.getchildren
1332
1333[clinic start generated code]*/
1334
1335static PyObject *
1336_elementtree_Element_getchildren_impl(ElementObject *self)
1337/*[clinic end generated code: output=e50ffe118637b14f input=0f754dfded150d5f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001338{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001339 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001340 PyObject* list;
1341
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001342 /* FIXME: report as deprecated? */
1343
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001344 if (!self->extra)
1345 return PyList_New(0);
1346
1347 list = PyList_New(self->extra->length);
1348 if (!list)
1349 return NULL;
1350
1351 for (i = 0; i < self->extra->length; i++) {
1352 PyObject* item = self->extra->children[i];
1353 Py_INCREF(item);
1354 PyList_SET_ITEM(list, i, item);
1355 }
1356
1357 return list;
1358}
1359
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001360
Eli Bendersky64d11e62012-06-15 07:42:50 +03001361static PyObject *
1362create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1363
1364
Serhiy Storchakacb985562015-05-04 15:32:48 +03001365/*[clinic input]
1366_elementtree.Element.iter
1367
1368 tag: object = None
1369
1370[clinic start generated code]*/
1371
Eli Bendersky64d11e62012-06-15 07:42:50 +03001372static PyObject *
Serhiy Storchakacb985562015-05-04 15:32:48 +03001373_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1374/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
Eli Bendersky64d11e62012-06-15 07:42:50 +03001375{
Serhiy Storchakad6a69d82015-12-09 11:27:07 +02001376 if (PyUnicode_Check(tag)) {
1377 if (PyUnicode_READY(tag) < 0)
1378 return NULL;
1379 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1380 tag = Py_None;
1381 }
1382 else if (PyBytes_Check(tag)) {
1383 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1384 tag = Py_None;
1385 }
1386
Eli Bendersky64d11e62012-06-15 07:42:50 +03001387 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001388}
1389
1390
Serhiy Storchakacb985562015-05-04 15:32:48 +03001391/*[clinic input]
1392_elementtree.Element.itertext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001393
Serhiy Storchakacb985562015-05-04 15:32:48 +03001394[clinic start generated code]*/
1395
1396static PyObject *
1397_elementtree_Element_itertext_impl(ElementObject *self)
1398/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1399{
Eli Bendersky64d11e62012-06-15 07:42:50 +03001400 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001401}
1402
Eli Bendersky64d11e62012-06-15 07:42:50 +03001403
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001404static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001405element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001406{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001407 ElementObject* self = (ElementObject*) self_;
1408
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409 if (!self->extra || index < 0 || index >= self->extra->length) {
1410 PyErr_SetString(
1411 PyExc_IndexError,
1412 "child index out of range"
1413 );
1414 return NULL;
1415 }
1416
1417 Py_INCREF(self->extra->children[index]);
1418 return self->extra->children[index];
1419}
1420
Serhiy Storchakacb985562015-05-04 15:32:48 +03001421/*[clinic input]
1422_elementtree.Element.insert
1423
1424 index: Py_ssize_t
1425 subelement: object(subclass_of='&Element_Type')
1426 /
1427
1428[clinic start generated code]*/
1429
1430static PyObject *
1431_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1432 PyObject *subelement)
1433/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001434{
Serhiy Storchakacb985562015-05-04 15:32:48 +03001435 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001436
Victor Stinner5f0af232013-07-11 23:01:36 +02001437 if (!self->extra) {
1438 if (create_extra(self, NULL) < 0)
1439 return NULL;
1440 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001441
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001442 if (index < 0) {
1443 index += self->extra->length;
1444 if (index < 0)
1445 index = 0;
1446 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001447 if (index > self->extra->length)
1448 index = self->extra->length;
1449
1450 if (element_resize(self, 1) < 0)
1451 return NULL;
1452
1453 for (i = self->extra->length; i > index; i--)
1454 self->extra->children[i] = self->extra->children[i-1];
1455
Serhiy Storchakacb985562015-05-04 15:32:48 +03001456 Py_INCREF(subelement);
1457 self->extra->children[index] = subelement;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001458
1459 self->extra->length++;
1460
1461 Py_RETURN_NONE;
1462}
1463
Serhiy Storchakacb985562015-05-04 15:32:48 +03001464/*[clinic input]
1465_elementtree.Element.items
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466
Serhiy Storchakacb985562015-05-04 15:32:48 +03001467[clinic start generated code]*/
1468
1469static PyObject *
1470_elementtree_Element_items_impl(ElementObject *self)
1471/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1472{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001473 if (!self->extra || self->extra->attrib == Py_None)
1474 return PyList_New(0);
1475
1476 return PyDict_Items(self->extra->attrib);
1477}
1478
Serhiy Storchakacb985562015-05-04 15:32:48 +03001479/*[clinic input]
1480_elementtree.Element.keys
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001481
Serhiy Storchakacb985562015-05-04 15:32:48 +03001482[clinic start generated code]*/
1483
1484static PyObject *
1485_elementtree_Element_keys_impl(ElementObject *self)
1486/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1487{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001488 if (!self->extra || self->extra->attrib == Py_None)
1489 return PyList_New(0);
1490
1491 return PyDict_Keys(self->extra->attrib);
1492}
1493
Martin v. Löwis18e16552006-02-15 17:27:45 +00001494static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001495element_length(ElementObject* self)
1496{
1497 if (!self->extra)
1498 return 0;
1499
1500 return self->extra->length;
1501}
1502
Serhiy Storchakacb985562015-05-04 15:32:48 +03001503/*[clinic input]
1504_elementtree.Element.makeelement
1505
1506 tag: object
1507 attrib: object
1508 /
1509
1510[clinic start generated code]*/
1511
1512static PyObject *
1513_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1514 PyObject *attrib)
1515/*[clinic end generated code: output=4109832d5bb789ef input=9480d1d2e3e68235]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001516{
1517 PyObject* elem;
1518
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001519 attrib = PyDict_Copy(attrib);
1520 if (!attrib)
1521 return NULL;
1522
Eli Bendersky092af1f2012-03-04 07:14:03 +02001523 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001524
1525 Py_DECREF(attrib);
1526
1527 return elem;
1528}
1529
Serhiy Storchakacb985562015-05-04 15:32:48 +03001530/*[clinic input]
1531_elementtree.Element.remove
1532
1533 subelement: object(subclass_of='&Element_Type')
1534 /
1535
1536[clinic start generated code]*/
1537
1538static PyObject *
1539_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1540/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001541{
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001542 Py_ssize_t i;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001543 int rc;
1544 PyObject *found;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001545
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001546 if (!self->extra) {
1547 /* element has no children, so raise exception */
1548 PyErr_SetString(
1549 PyExc_ValueError,
1550 "list.remove(x): x not in list"
1551 );
1552 return NULL;
1553 }
1554
1555 for (i = 0; i < self->extra->length; i++) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001556 if (self->extra->children[i] == subelement)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001557 break;
Serhiy Storchakaa2c145c2015-05-18 18:33:31 +03001558 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001559 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001560 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001561 if (rc < 0)
1562 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001563 }
1564
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001565 if (i >= self->extra->length) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03001566 /* subelement is not in children, so raise exception */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001567 PyErr_SetString(
1568 PyExc_ValueError,
1569 "list.remove(x): x not in list"
1570 );
1571 return NULL;
1572 }
1573
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001574 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001575
1576 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001577 for (; i < self->extra->length; i++)
1578 self->extra->children[i] = self->extra->children[i+1];
1579
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001580 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001581 Py_RETURN_NONE;
1582}
1583
1584static PyObject*
1585element_repr(ElementObject* self)
1586{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001587 if (self->tag)
1588 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1589 else
1590 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001591}
1592
Serhiy Storchakacb985562015-05-04 15:32:48 +03001593/*[clinic input]
1594_elementtree.Element.set
1595
1596 key: object
1597 value: object
1598 /
1599
1600[clinic start generated code]*/
1601
1602static PyObject *
1603_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1604 PyObject *value)
1605/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001606{
1607 PyObject* attrib;
1608
Victor Stinner5f0af232013-07-11 23:01:36 +02001609 if (!self->extra) {
1610 if (create_extra(self, NULL) < 0)
1611 return NULL;
1612 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001613
1614 attrib = element_get_attrib(self);
1615 if (!attrib)
1616 return NULL;
1617
1618 if (PyDict_SetItem(attrib, key, value) < 0)
1619 return NULL;
1620
1621 Py_RETURN_NONE;
1622}
1623
1624static int
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001625element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001626{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001627 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka26861b02015-02-16 20:52:17 +02001628 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001629 PyObject* old;
1630
1631 if (!self->extra || index < 0 || index >= self->extra->length) {
1632 PyErr_SetString(
1633 PyExc_IndexError,
1634 "child assignment index out of range");
1635 return -1;
1636 }
1637
1638 old = self->extra->children[index];
1639
1640 if (item) {
1641 Py_INCREF(item);
1642 self->extra->children[index] = item;
1643 } else {
1644 self->extra->length--;
1645 for (i = index; i < self->extra->length; i++)
1646 self->extra->children[i] = self->extra->children[i+1];
1647 }
1648
1649 Py_DECREF(old);
1650
1651 return 0;
1652}
1653
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001654static PyObject*
1655element_subscr(PyObject* self_, PyObject* item)
1656{
1657 ElementObject* self = (ElementObject*) self_;
1658
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001659 if (PyIndex_Check(item)) {
1660 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001661
1662 if (i == -1 && PyErr_Occurred()) {
1663 return NULL;
1664 }
1665 if (i < 0 && self->extra)
1666 i += self->extra->length;
1667 return element_getitem(self_, i);
1668 }
1669 else if (PySlice_Check(item)) {
1670 Py_ssize_t start, stop, step, slicelen, cur, i;
1671 PyObject* list;
1672
1673 if (!self->extra)
1674 return PyList_New(0);
1675
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001676 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001677 self->extra->length,
1678 &start, &stop, &step, &slicelen) < 0) {
1679 return NULL;
1680 }
1681
1682 if (slicelen <= 0)
1683 return PyList_New(0);
1684 else {
1685 list = PyList_New(slicelen);
1686 if (!list)
1687 return NULL;
1688
1689 for (cur = start, i = 0; i < slicelen;
1690 cur += step, i++) {
1691 PyObject* item = self->extra->children[cur];
1692 Py_INCREF(item);
1693 PyList_SET_ITEM(list, i, item);
1694 }
1695
1696 return list;
1697 }
1698 }
1699 else {
1700 PyErr_SetString(PyExc_TypeError,
1701 "element indices must be integers");
1702 return NULL;
1703 }
1704}
1705
1706static int
1707element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1708{
1709 ElementObject* self = (ElementObject*) self_;
1710
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001711 if (PyIndex_Check(item)) {
1712 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001713
1714 if (i == -1 && PyErr_Occurred()) {
1715 return -1;
1716 }
1717 if (i < 0 && self->extra)
1718 i += self->extra->length;
1719 return element_setitem(self_, i, value);
1720 }
1721 else if (PySlice_Check(item)) {
1722 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1723
1724 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001725 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726
Victor Stinner5f0af232013-07-11 23:01:36 +02001727 if (!self->extra) {
1728 if (create_extra(self, NULL) < 0)
1729 return -1;
1730 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001731
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001732 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001733 self->extra->length,
1734 &start, &stop, &step, &slicelen) < 0) {
1735 return -1;
1736 }
1737
Eli Bendersky865756a2012-03-09 13:38:15 +02001738 if (value == NULL) {
1739 /* Delete slice */
1740 size_t cur;
1741 Py_ssize_t i;
1742
1743 if (slicelen <= 0)
1744 return 0;
1745
1746 /* Since we're deleting, the direction of the range doesn't matter,
1747 * so for simplicity make it always ascending.
1748 */
1749 if (step < 0) {
1750 stop = start + 1;
1751 start = stop + step * (slicelen - 1) - 1;
1752 step = -step;
1753 }
1754
1755 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1756
1757 /* recycle is a list that will contain all the children
1758 * scheduled for removal.
1759 */
1760 if (!(recycle = PyList_New(slicelen))) {
1761 PyErr_NoMemory();
1762 return -1;
1763 }
1764
1765 /* This loop walks over all the children that have to be deleted,
1766 * with cur pointing at them. num_moved is the amount of children
1767 * until the next deleted child that have to be "shifted down" to
1768 * occupy the deleted's places.
1769 * Note that in the ith iteration, shifting is done i+i places down
1770 * because i children were already removed.
1771 */
1772 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1773 /* Compute how many children have to be moved, clipping at the
1774 * list end.
1775 */
1776 Py_ssize_t num_moved = step - 1;
1777 if (cur + step >= (size_t)self->extra->length) {
1778 num_moved = self->extra->length - cur - 1;
1779 }
1780
1781 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1782
1783 memmove(
1784 self->extra->children + cur - i,
1785 self->extra->children + cur + 1,
1786 num_moved * sizeof(PyObject *));
1787 }
1788
1789 /* Leftover "tail" after the last removed child */
1790 cur = start + (size_t)slicelen * step;
1791 if (cur < (size_t)self->extra->length) {
1792 memmove(
1793 self->extra->children + cur - slicelen,
1794 self->extra->children + cur,
1795 (self->extra->length - cur) * sizeof(PyObject *));
1796 }
1797
1798 self->extra->length -= slicelen;
1799
1800 /* Discard the recycle list with all the deleted sub-elements */
1801 Py_XDECREF(recycle);
1802 return 0;
1803 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001804
1805 /* A new slice is actually being assigned */
1806 seq = PySequence_Fast(value, "");
1807 if (!seq) {
1808 PyErr_Format(
1809 PyExc_TypeError,
1810 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1811 );
1812 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001813 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001814 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001815
1816 if (step != 1 && newlen != slicelen)
1817 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001818 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001819 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001820 "attempt to assign sequence of size %zd "
1821 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001822 newlen, slicelen
1823 );
1824 return -1;
1825 }
1826
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001827 /* Resize before creating the recycle bin, to prevent refleaks. */
1828 if (newlen > slicelen) {
1829 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001830 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001831 return -1;
1832 }
1833 }
1834
1835 if (slicelen > 0) {
1836 /* to avoid recursive calls to this method (via decref), move
1837 old items to the recycle bin here, and get rid of them when
1838 we're done modifying the element */
1839 recycle = PyList_New(slicelen);
1840 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001841 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001842 return -1;
1843 }
1844 for (cur = start, i = 0; i < slicelen;
1845 cur += step, i++)
1846 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1847 }
1848
1849 if (newlen < slicelen) {
1850 /* delete slice */
1851 for (i = stop; i < self->extra->length; i++)
1852 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1853 } else if (newlen > slicelen) {
1854 /* insert slice */
1855 for (i = self->extra->length-1; i >= stop; i--)
1856 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1857 }
1858
1859 /* replace the slice */
1860 for (cur = start, i = 0; i < newlen;
1861 cur += step, i++) {
1862 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1863 Py_INCREF(element);
1864 self->extra->children[cur] = element;
1865 }
1866
1867 self->extra->length += newlen - slicelen;
1868
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001869 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001870
1871 /* discard the recycle bin, and everything in it */
1872 Py_XDECREF(recycle);
1873
1874 return 0;
1875 }
1876 else {
1877 PyErr_SetString(PyExc_TypeError,
1878 "element indices must be integers");
1879 return -1;
1880 }
1881}
1882
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001883static PyObject*
Serhiy Storchakadde08152015-11-25 15:28:13 +02001884element_tag_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001886 PyObject *res = self->tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001887 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001888 return res;
1889}
1890
Serhiy Storchakadde08152015-11-25 15:28:13 +02001891static PyObject*
1892element_text_getter(ElementObject *self, void *closure)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001893{
Serhiy Storchakadde08152015-11-25 15:28:13 +02001894 PyObject *res = element_get_text(self);
1895 Py_XINCREF(res);
1896 return res;
1897}
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001898
Serhiy Storchakadde08152015-11-25 15:28:13 +02001899static PyObject*
1900element_tail_getter(ElementObject *self, void *closure)
1901{
1902 PyObject *res = element_get_tail(self);
1903 Py_XINCREF(res);
1904 return res;
1905}
1906
1907static PyObject*
1908element_attrib_getter(ElementObject *self, void *closure)
1909{
1910 PyObject *res;
1911 if (!self->extra) {
1912 if (create_extra(self, NULL) < 0)
1913 return NULL;
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001914 }
Serhiy Storchakadde08152015-11-25 15:28:13 +02001915 res = element_get_attrib(self);
1916 Py_XINCREF(res);
1917 return res;
1918}
Victor Stinner4d463432013-07-11 23:05:03 +02001919
Serhiy Storchakadde08152015-11-25 15:28:13 +02001920/* macro for setter validation */
1921#define _VALIDATE_ATTR_VALUE(V) \
1922 if ((V) == NULL) { \
1923 PyErr_SetString( \
1924 PyExc_AttributeError, \
1925 "can't delete element attribute"); \
1926 return -1; \
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927 }
1928
Serhiy Storchakadde08152015-11-25 15:28:13 +02001929static int
1930element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1931{
1932 _VALIDATE_ATTR_VALUE(value);
1933 Py_INCREF(value);
1934 Py_DECREF(self->tag);
1935 self->tag = value;
1936 return 0;
1937}
1938
1939static int
1940element_text_setter(ElementObject *self, PyObject *value, void *closure)
1941{
1942 _VALIDATE_ATTR_VALUE(value);
1943 Py_INCREF(value);
1944 Py_DECREF(JOIN_OBJ(self->text));
1945 self->text = value;
1946 return 0;
1947}
1948
1949static int
1950element_tail_setter(ElementObject *self, PyObject *value, void *closure)
1951{
1952 _VALIDATE_ATTR_VALUE(value);
1953 Py_INCREF(value);
1954 Py_DECREF(JOIN_OBJ(self->tail));
1955 self->tail = value;
1956 return 0;
1957}
1958
1959static int
1960element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
1961{
1962 _VALIDATE_ATTR_VALUE(value);
1963 if (!self->extra) {
1964 if (create_extra(self, NULL) < 0)
1965 return -1;
1966 }
1967 Py_INCREF(value);
1968 Py_DECREF(self->extra->attrib);
1969 self->extra->attrib = value;
Eli Benderskyef9683b2013-05-18 07:52:34 -07001970 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001971}
1972
1973static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001975 0, /* sq_concat */
1976 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001977 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001978 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001980 0,
1981};
1982
Eli Bendersky64d11e62012-06-15 07:42:50 +03001983/******************************* Element iterator ****************************/
1984
1985/* ElementIterObject represents the iteration state over an XML element in
1986 * pre-order traversal. To keep track of which sub-element should be returned
1987 * next, a stack of parents is maintained. This is a standard stack-based
1988 * iterative pre-order traversal of a tree.
1989 * The stack is managed using a single-linked list starting at parent_stack.
1990 * Each stack node contains the saved parent to which we should return after
1991 * the current one is exhausted, and the next child to examine in that parent.
1992 */
1993typedef struct ParentLocator_t {
1994 ElementObject *parent;
1995 Py_ssize_t child_index;
1996 struct ParentLocator_t *next;
1997} ParentLocator;
1998
1999typedef struct {
2000 PyObject_HEAD
2001 ParentLocator *parent_stack;
2002 ElementObject *root_element;
2003 PyObject *sought_tag;
2004 int root_done;
2005 int gettext;
2006} ElementIterObject;
2007
2008
2009static void
2010elementiter_dealloc(ElementIterObject *it)
2011{
2012 ParentLocator *p = it->parent_stack;
2013 while (p) {
2014 ParentLocator *temp = p;
2015 Py_XDECREF(p->parent);
2016 p = p->next;
2017 PyObject_Free(temp);
2018 }
2019
2020 Py_XDECREF(it->sought_tag);
2021 Py_XDECREF(it->root_element);
2022
2023 PyObject_GC_UnTrack(it);
2024 PyObject_GC_Del(it);
2025}
2026
2027static int
2028elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2029{
2030 ParentLocator *p = it->parent_stack;
2031 while (p) {
2032 Py_VISIT(p->parent);
2033 p = p->next;
2034 }
2035
2036 Py_VISIT(it->root_element);
2037 Py_VISIT(it->sought_tag);
2038 return 0;
2039}
2040
2041/* Helper function for elementiter_next. Add a new parent to the parent stack.
2042 */
2043static ParentLocator *
2044parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2045{
2046 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2047 if (new_node) {
2048 new_node->parent = parent;
2049 Py_INCREF(parent);
2050 new_node->child_index = 0;
2051 new_node->next = stack;
2052 }
2053 return new_node;
2054}
2055
2056static PyObject *
2057elementiter_next(ElementIterObject *it)
2058{
2059 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002060 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061 * A short note on gettext: this function serves both the iter() and
2062 * itertext() methods to avoid code duplication. However, there are a few
2063 * small differences in the way these iterations work. Namely:
2064 * - itertext() only yields text from nodes that have it, and continues
2065 * iterating when a node doesn't have text (so it doesn't return any
2066 * node like iter())
2067 * - itertext() also has to handle tail, after finishing with all the
2068 * children of a node.
2069 */
Eli Bendersky113da642012-06-15 07:52:49 +03002070 ElementObject *cur_parent;
2071 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002072 int rc;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002073
2074 while (1) {
2075 /* Handle the case reached in the beginning and end of iteration, where
2076 * the parent stack is empty. The root_done flag gives us indication
2077 * whether we've just started iterating (so root_done is 0), in which
2078 * case the root is returned. If root_done is 1 and we're here, the
2079 * iterator is exhausted.
2080 */
2081 if (!it->parent_stack->parent) {
2082 if (it->root_done) {
2083 PyErr_SetNone(PyExc_StopIteration);
2084 return NULL;
2085 } else {
2086 it->parent_stack = parent_stack_push_new(it->parent_stack,
2087 it->root_element);
2088 if (!it->parent_stack) {
2089 PyErr_NoMemory();
2090 return NULL;
2091 }
2092
2093 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002094 rc = (it->sought_tag == Py_None);
2095 if (!rc) {
2096 rc = PyObject_RichCompareBool(it->root_element->tag,
2097 it->sought_tag, Py_EQ);
2098 if (rc < 0)
2099 return NULL;
2100 }
2101 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002102 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002103 PyObject *text = element_get_text(it->root_element);
2104 if (!text)
2105 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002106 rc = PyObject_IsTrue(text);
2107 if (rc < 0)
2108 return NULL;
2109 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002110 Py_INCREF(text);
2111 return text;
2112 }
2113 } else {
2114 Py_INCREF(it->root_element);
2115 return (PyObject *)it->root_element;
2116 }
2117 }
2118 }
2119 }
2120
2121 /* See if there are children left to traverse in the current parent. If
2122 * yes, visit the next child. If not, pop the stack and try again.
2123 */
Eli Bendersky113da642012-06-15 07:52:49 +03002124 cur_parent = it->parent_stack->parent;
2125 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002126 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2127 ElementObject *child = (ElementObject *)
2128 cur_parent->extra->children[child_index];
2129 it->parent_stack->child_index++;
2130 it->parent_stack = parent_stack_push_new(it->parent_stack,
2131 child);
2132 if (!it->parent_stack) {
2133 PyErr_NoMemory();
2134 return NULL;
2135 }
2136
2137 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002138 PyObject *text = element_get_text(child);
2139 if (!text)
2140 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002141 rc = PyObject_IsTrue(text);
2142 if (rc < 0)
2143 return NULL;
2144 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002145 Py_INCREF(text);
2146 return text;
2147 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002148 } else {
2149 rc = (it->sought_tag == Py_None);
2150 if (!rc) {
2151 rc = PyObject_RichCompareBool(child->tag,
2152 it->sought_tag, Py_EQ);
2153 if (rc < 0)
2154 return NULL;
2155 }
2156 if (rc) {
2157 Py_INCREF(child);
2158 return (PyObject *)child;
2159 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002161 }
2162 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002163 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002164 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002165 if (it->gettext) {
2166 tail = element_get_tail(cur_parent);
2167 if (!tail)
2168 return NULL;
2169 }
2170 else
2171 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002172 Py_XDECREF(it->parent_stack->parent);
2173 PyObject_Free(it->parent_stack);
2174 it->parent_stack = next;
2175
2176 /* Note that extra condition on it->parent_stack->parent here;
2177 * this is because itertext() is supposed to only return *inner*
2178 * text, not text following the element it began iteration with.
2179 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002180 if (it->parent_stack->parent) {
2181 rc = PyObject_IsTrue(tail);
2182 if (rc < 0)
2183 return NULL;
2184 if (rc) {
2185 Py_INCREF(tail);
2186 return tail;
2187 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 }
2189 }
2190 }
2191
2192 return NULL;
2193}
2194
2195
2196static PyTypeObject ElementIter_Type = {
2197 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002198 /* Using the module's name since the pure-Python implementation does not
2199 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002200 "_elementtree._element_iterator", /* tp_name */
2201 sizeof(ElementIterObject), /* tp_basicsize */
2202 0, /* tp_itemsize */
2203 /* methods */
2204 (destructor)elementiter_dealloc, /* tp_dealloc */
2205 0, /* tp_print */
2206 0, /* tp_getattr */
2207 0, /* tp_setattr */
2208 0, /* tp_reserved */
2209 0, /* tp_repr */
2210 0, /* tp_as_number */
2211 0, /* tp_as_sequence */
2212 0, /* tp_as_mapping */
2213 0, /* tp_hash */
2214 0, /* tp_call */
2215 0, /* tp_str */
2216 0, /* tp_getattro */
2217 0, /* tp_setattro */
2218 0, /* tp_as_buffer */
2219 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2220 0, /* tp_doc */
2221 (traverseproc)elementiter_traverse, /* tp_traverse */
2222 0, /* tp_clear */
2223 0, /* tp_richcompare */
2224 0, /* tp_weaklistoffset */
2225 PyObject_SelfIter, /* tp_iter */
2226 (iternextfunc)elementiter_next, /* tp_iternext */
2227 0, /* tp_methods */
2228 0, /* tp_members */
2229 0, /* tp_getset */
2230 0, /* tp_base */
2231 0, /* tp_dict */
2232 0, /* tp_descr_get */
2233 0, /* tp_descr_set */
2234 0, /* tp_dictoffset */
2235 0, /* tp_init */
2236 0, /* tp_alloc */
2237 0, /* tp_new */
2238};
2239
2240
2241static PyObject *
2242create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2243{
2244 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002245
2246 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2247 if (!it)
2248 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002249
Victor Stinner4d463432013-07-11 23:05:03 +02002250 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002251 it->sought_tag = tag;
2252 it->root_done = 0;
2253 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002254 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002255 it->root_element = self;
2256
Eli Bendersky64d11e62012-06-15 07:42:50 +03002257 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002258
2259 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2260 if (it->parent_stack == NULL) {
2261 Py_DECREF(it);
2262 PyErr_NoMemory();
2263 return NULL;
2264 }
2265 it->parent_stack->parent = NULL;
2266 it->parent_stack->child_index = 0;
2267 it->parent_stack->next = NULL;
2268
Eli Bendersky64d11e62012-06-15 07:42:50 +03002269 return (PyObject *)it;
2270}
2271
2272
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002273/* ==================================================================== */
2274/* the tree builder type */
2275
2276typedef struct {
2277 PyObject_HEAD
2278
Eli Bendersky58d548d2012-05-29 15:45:16 +03002279 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002280
Antoine Pitrouee329312012-10-04 19:53:29 +02002281 PyObject *this; /* current node */
2282 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002283
Eli Bendersky58d548d2012-05-29 15:45:16 +03002284 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002285
Eli Bendersky58d548d2012-05-29 15:45:16 +03002286 PyObject *stack; /* element stack */
2287 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002288
Eli Bendersky48d358b2012-05-30 17:57:50 +03002289 PyObject *element_factory;
2290
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002291 /* element tracing */
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002292 PyObject *events_append; /* the append method of the list of events, or NULL */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002293 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2294 PyObject *end_event_obj;
2295 PyObject *start_ns_event_obj;
2296 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002297} TreeBuilderObject;
2298
Christian Heimes90aa7642007-12-19 02:45:37 +00002299#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002300
2301/* -------------------------------------------------------------------- */
2302/* constructor and destructor */
2303
Eli Bendersky58d548d2012-05-29 15:45:16 +03002304static PyObject *
2305treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002306{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002307 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2308 if (t != NULL) {
2309 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002310
Eli Bendersky58d548d2012-05-29 15:45:16 +03002311 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002312 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002313 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002314 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002315
Eli Bendersky58d548d2012-05-29 15:45:16 +03002316 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002317 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002318 t->stack = PyList_New(20);
2319 if (!t->stack) {
2320 Py_DECREF(t->this);
2321 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002322 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002323 return NULL;
2324 }
2325 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002326
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002327 t->events_append = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002328 t->start_event_obj = t->end_event_obj = NULL;
2329 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2330 }
2331 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002332}
2333
Serhiy Storchakacb985562015-05-04 15:32:48 +03002334/*[clinic input]
2335_elementtree.TreeBuilder.__init__
Eli Bendersky48d358b2012-05-30 17:57:50 +03002336
Serhiy Storchakacb985562015-05-04 15:32:48 +03002337 element_factory: object = NULL
2338
2339[clinic start generated code]*/
2340
2341static int
2342_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2343 PyObject *element_factory)
2344/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
2345{
2346 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002347
2348 if (element_factory) {
2349 Py_INCREF(element_factory);
Serhiy Storchakacb985562015-05-04 15:32:48 +03002350 tmp = self->element_factory;
2351 self->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002352 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002353 }
2354
Eli Bendersky58d548d2012-05-29 15:45:16 +03002355 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002356}
2357
Eli Bendersky48d358b2012-05-30 17:57:50 +03002358static int
2359treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2360{
2361 Py_VISIT(self->root);
2362 Py_VISIT(self->this);
2363 Py_VISIT(self->last);
2364 Py_VISIT(self->data);
2365 Py_VISIT(self->stack);
2366 Py_VISIT(self->element_factory);
2367 return 0;
2368}
2369
2370static int
2371treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002372{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002373 Py_CLEAR(self->end_ns_event_obj);
2374 Py_CLEAR(self->start_ns_event_obj);
2375 Py_CLEAR(self->end_event_obj);
2376 Py_CLEAR(self->start_event_obj);
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002377 Py_CLEAR(self->events_append);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002378 Py_CLEAR(self->stack);
2379 Py_CLEAR(self->data);
2380 Py_CLEAR(self->last);
2381 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002383 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002384 return 0;
2385}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002386
Eli Bendersky48d358b2012-05-30 17:57:50 +03002387static void
2388treebuilder_dealloc(TreeBuilderObject *self)
2389{
2390 PyObject_GC_UnTrack(self);
2391 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002392 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002393}
2394
2395/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002396/* helpers for handling of arbitrary element-like objects */
2397
2398static int
2399treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2400 PyObject **dest, _Py_Identifier *name)
2401{
2402 if (Element_CheckExact(element)) {
2403 Py_DECREF(JOIN_OBJ(*dest));
2404 *dest = JOIN_SET(data, PyList_CheckExact(data));
2405 return 0;
2406 }
2407 else {
2408 PyObject *joined = list_join(data);
2409 int r;
2410 if (joined == NULL)
2411 return -1;
2412 r = _PyObject_SetAttrId(element, name, joined);
2413 Py_DECREF(joined);
2414 return r;
2415 }
2416}
2417
2418/* These two functions steal a reference to data */
2419static int
2420treebuilder_set_element_text(PyObject *element, PyObject *data)
2421{
2422 _Py_IDENTIFIER(text);
2423 return treebuilder_set_element_text_or_tail(
2424 element, data, &((ElementObject *) element)->text, &PyId_text);
2425}
2426
2427static int
2428treebuilder_set_element_tail(PyObject *element, PyObject *data)
2429{
2430 _Py_IDENTIFIER(tail);
2431 return treebuilder_set_element_text_or_tail(
2432 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2433}
2434
2435static int
2436treebuilder_add_subelement(PyObject *element, PyObject *child)
2437{
2438 _Py_IDENTIFIER(append);
2439 if (Element_CheckExact(element)) {
2440 ElementObject *elem = (ElementObject *) element;
2441 return element_add_subelement(elem, child);
2442 }
2443 else {
2444 PyObject *res;
2445 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2446 if (res == NULL)
2447 return -1;
2448 Py_DECREF(res);
2449 return 0;
2450 }
2451}
2452
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002453LOCAL(int)
2454treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2455 PyObject *node)
2456{
2457 if (action != NULL) {
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02002458 PyObject *res;
2459 PyObject *event = PyTuple_Pack(2, action, node);
2460 if (event == NULL)
2461 return -1;
2462 res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
2463 Py_DECREF(event);
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002464 if (res == NULL)
2465 return -1;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002466 Py_DECREF(res);
2467 }
2468 return 0;
2469}
2470
Antoine Pitrouee329312012-10-04 19:53:29 +02002471/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472/* handlers */
2473
2474LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002475treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2476 PyObject* attrib)
2477{
2478 PyObject* node;
2479 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002480 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002481
2482 if (self->data) {
2483 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002484 if (treebuilder_set_element_text(self->last, self->data))
2485 return NULL;
2486 }
2487 else {
2488 if (treebuilder_set_element_tail(self->last, self->data))
2489 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002490 }
2491 self->data = NULL;
2492 }
2493
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002494 if (!self->element_factory || self->element_factory == Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002495 node = create_new_element(tag, attrib);
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002496 } else if (attrib == Py_None) {
2497 attrib = PyDict_New();
2498 if (!attrib)
2499 return NULL;
2500 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2501 Py_DECREF(attrib);
2502 }
2503 else {
2504 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002505 }
2506 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002507 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002508 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509
Antoine Pitrouee329312012-10-04 19:53:29 +02002510 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002511
2512 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002513 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002514 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002515 } else {
2516 if (self->root) {
2517 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002518 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002519 "multiple elements on top level"
2520 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002521 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002522 }
2523 Py_INCREF(node);
2524 self->root = node;
2525 }
2526
2527 if (self->index < PyList_GET_SIZE(self->stack)) {
2528 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002529 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530 Py_INCREF(this);
2531 } else {
2532 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002533 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534 }
2535 self->index++;
2536
2537 Py_DECREF(this);
2538 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002539 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002540
2541 Py_DECREF(self->last);
2542 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002543 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002544
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002545 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2546 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002547
2548 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002549
2550 error:
2551 Py_DECREF(node);
2552 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553}
2554
2555LOCAL(PyObject*)
2556treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2557{
2558 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002559 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002560 /* ignore calls to data before the first call to start */
2561 Py_RETURN_NONE;
2562 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002563 /* store the first item as is */
2564 Py_INCREF(data); self->data = data;
2565 } else {
2566 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002567 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2568 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002569 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002570 /* expat often generates single character data sections; handle
2571 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002572 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2573 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002574 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002575 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002576 } else if (PyList_CheckExact(self->data)) {
2577 if (PyList_Append(self->data, data) < 0)
2578 return NULL;
2579 } else {
2580 PyObject* list = PyList_New(2);
2581 if (!list)
2582 return NULL;
2583 PyList_SET_ITEM(list, 0, self->data);
2584 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2585 self->data = list;
2586 }
2587 }
2588
2589 Py_RETURN_NONE;
2590}
2591
2592LOCAL(PyObject*)
2593treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2594{
2595 PyObject* item;
2596
2597 if (self->data) {
2598 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002599 if (treebuilder_set_element_text(self->last, self->data))
2600 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002601 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002602 if (treebuilder_set_element_tail(self->last, self->data))
2603 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002604 }
2605 self->data = NULL;
2606 }
2607
2608 if (self->index == 0) {
2609 PyErr_SetString(
2610 PyExc_IndexError,
2611 "pop from empty stack"
2612 );
2613 return NULL;
2614 }
2615
2616 self->index--;
2617
2618 item = PyList_GET_ITEM(self->stack, self->index);
2619 Py_INCREF(item);
2620
2621 Py_DECREF(self->last);
2622
Antoine Pitrouee329312012-10-04 19:53:29 +02002623 self->last = self->this;
2624 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002625
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002626 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2627 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002628
2629 Py_INCREF(self->last);
2630 return (PyObject*) self->last;
2631}
2632
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002633/* -------------------------------------------------------------------- */
2634/* methods (in alphabetical order) */
2635
Serhiy Storchakacb985562015-05-04 15:32:48 +03002636/*[clinic input]
2637_elementtree.TreeBuilder.data
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002638
Serhiy Storchakacb985562015-05-04 15:32:48 +03002639 data: object
2640 /
2641
2642[clinic start generated code]*/
2643
2644static PyObject *
2645_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2646/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2647{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002648 return treebuilder_handle_data(self, data);
2649}
2650
Serhiy Storchakacb985562015-05-04 15:32:48 +03002651/*[clinic input]
2652_elementtree.TreeBuilder.end
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002653
Serhiy Storchakacb985562015-05-04 15:32:48 +03002654 tag: object
2655 /
2656
2657[clinic start generated code]*/
2658
2659static PyObject *
2660_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2661/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2662{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002663 return treebuilder_handle_end(self, tag);
2664}
2665
2666LOCAL(PyObject*)
2667treebuilder_done(TreeBuilderObject* self)
2668{
2669 PyObject* res;
2670
2671 /* FIXME: check stack size? */
2672
2673 if (self->root)
2674 res = self->root;
2675 else
2676 res = Py_None;
2677
2678 Py_INCREF(res);
2679 return res;
2680}
2681
Serhiy Storchakacb985562015-05-04 15:32:48 +03002682/*[clinic input]
2683_elementtree.TreeBuilder.close
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002684
Serhiy Storchakacb985562015-05-04 15:32:48 +03002685[clinic start generated code]*/
2686
2687static PyObject *
2688_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
2689/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
2690{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002691 return treebuilder_done(self);
2692}
2693
Serhiy Storchakacb985562015-05-04 15:32:48 +03002694/*[clinic input]
2695_elementtree.TreeBuilder.start
2696
2697 tag: object
2698 attrs: object = None
2699 /
2700
2701[clinic start generated code]*/
2702
2703static PyObject *
2704_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
2705 PyObject *attrs)
2706/*[clinic end generated code: output=e7e9dc2861349411 input=95fc1758dd042c65]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002707{
Serhiy Storchakacb985562015-05-04 15:32:48 +03002708 return treebuilder_handle_start(self, tag, attrs);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002709}
2710
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002711/* ==================================================================== */
2712/* the expat interface */
2713
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002714#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002715#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002716
2717/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2718 * cached globally without being in per-module state.
2719 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002720static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002721#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002722
Eli Bendersky52467b12012-06-01 07:13:08 +03002723static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2724 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2725
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002726typedef struct {
2727 PyObject_HEAD
2728
2729 XML_Parser parser;
2730
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002731 PyObject *target;
2732 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002733
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002734 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002736 PyObject *handle_start;
2737 PyObject *handle_data;
2738 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002739
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002740 PyObject *handle_comment;
2741 PyObject *handle_pi;
2742 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002744 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002745
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002746} XMLParserObject;
2747
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03002748static PyObject*
2749_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args);
2750static PyObject *
2751_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
2752 PyObject *pubid, PyObject *system);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002753
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754/* helpers */
2755
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756LOCAL(PyObject*)
2757makeuniversal(XMLParserObject* self, const char* string)
2758{
2759 /* convert a UTF-8 tag/attribute name from the expat parser
2760 to a universal name string */
2761
Antoine Pitrouc1948842012-10-01 23:40:37 +02002762 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002763 PyObject* key;
2764 PyObject* value;
2765
2766 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002767 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002768 if (!key)
2769 return NULL;
2770
2771 value = PyDict_GetItem(self->names, key);
2772
2773 if (value) {
2774 Py_INCREF(value);
2775 } else {
2776 /* new name. convert to universal name, and decode as
2777 necessary */
2778
2779 PyObject* tag;
2780 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002781 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002782
2783 /* look for namespace separator */
2784 for (i = 0; i < size; i++)
2785 if (string[i] == '}')
2786 break;
2787 if (i != size) {
2788 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002789 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002790 if (tag == NULL) {
2791 Py_DECREF(key);
2792 return NULL;
2793 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002794 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002795 p[0] = '{';
2796 memcpy(p+1, string, size);
2797 size++;
2798 } else {
2799 /* plain name; use key as tag */
2800 Py_INCREF(key);
2801 tag = key;
2802 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002803
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002804 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002805 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002806 value = PyUnicode_DecodeUTF8(p, size, "strict");
2807 Py_DECREF(tag);
2808 if (!value) {
2809 Py_DECREF(key);
2810 return NULL;
2811 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812
2813 /* add to names dictionary */
2814 if (PyDict_SetItem(self->names, key, value) < 0) {
2815 Py_DECREF(key);
2816 Py_DECREF(value);
2817 return NULL;
2818 }
2819 }
2820
2821 Py_DECREF(key);
2822 return value;
2823}
2824
Eli Bendersky5b77d812012-03-16 08:20:05 +02002825/* Set the ParseError exception with the given parameters.
2826 * If message is not NULL, it's used as the error string. Otherwise, the
2827 * message string is the default for the given error_code.
2828*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002829static void
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002830expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
2831 const char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002832{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002833 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002834 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002835
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002836 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002837 message ? message : EXPAT(ErrorString)(error_code),
2838 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002839 if (errmsg == NULL)
2840 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002841
Eli Bendersky532d03e2013-08-10 08:00:39 -07002842 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002843 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002844 if (!error)
2845 return;
2846
Eli Bendersky5b77d812012-03-16 08:20:05 +02002847 /* Add code and position attributes */
2848 code = PyLong_FromLong((long)error_code);
2849 if (!code) {
2850 Py_DECREF(error);
2851 return;
2852 }
2853 if (PyObject_SetAttrString(error, "code", code) == -1) {
2854 Py_DECREF(error);
2855 Py_DECREF(code);
2856 return;
2857 }
2858 Py_DECREF(code);
2859
Serhiy Storchaka26861b02015-02-16 20:52:17 +02002860 position = Py_BuildValue("(nn)", line, column);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002861 if (!position) {
2862 Py_DECREF(error);
2863 return;
2864 }
2865 if (PyObject_SetAttrString(error, "position", position) == -1) {
2866 Py_DECREF(error);
2867 Py_DECREF(position);
2868 return;
2869 }
2870 Py_DECREF(position);
2871
Eli Bendersky532d03e2013-08-10 08:00:39 -07002872 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002873 Py_DECREF(error);
2874}
2875
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002876/* -------------------------------------------------------------------- */
2877/* handlers */
2878
2879static void
2880expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2881 int data_len)
2882{
2883 PyObject* key;
2884 PyObject* value;
2885 PyObject* res;
2886
2887 if (data_len < 2 || data_in[0] != '&')
2888 return;
2889
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002890 if (PyErr_Occurred())
2891 return;
2892
Neal Norwitz0269b912007-08-08 06:56:02 +00002893 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002894 if (!key)
2895 return;
2896
2897 value = PyDict_GetItem(self->entity, key);
2898
2899 if (value) {
2900 if (TreeBuilder_CheckExact(self->target))
2901 res = treebuilder_handle_data(
2902 (TreeBuilderObject*) self->target, value
2903 );
2904 else if (self->handle_data)
2905 res = PyObject_CallFunction(self->handle_data, "O", value);
2906 else
2907 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002908 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002909 } else if (!PyErr_Occurred()) {
2910 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002911 char message[128] = "undefined entity ";
2912 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002913 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002914 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002915 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002916 EXPAT(GetErrorColumnNumber)(self->parser),
2917 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002918 );
2919 }
2920
2921 Py_DECREF(key);
2922}
2923
2924static void
2925expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2926 const XML_Char **attrib_in)
2927{
2928 PyObject* res;
2929 PyObject* tag;
2930 PyObject* attrib;
2931 int ok;
2932
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002933 if (PyErr_Occurred())
2934 return;
2935
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002936 /* tag name */
2937 tag = makeuniversal(self, tag_in);
2938 if (!tag)
2939 return; /* parser will look for errors */
2940
2941 /* attributes */
2942 if (attrib_in[0]) {
2943 attrib = PyDict_New();
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002944 if (!attrib) {
2945 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002946 return;
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002947 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002948 while (attrib_in[0] && attrib_in[1]) {
2949 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002950 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002951 if (!key || !value) {
2952 Py_XDECREF(value);
2953 Py_XDECREF(key);
2954 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002955 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002956 return;
2957 }
2958 ok = PyDict_SetItem(attrib, key, value);
2959 Py_DECREF(value);
2960 Py_DECREF(key);
2961 if (ok < 0) {
2962 Py_DECREF(attrib);
Serhiy Storchakaa29eb082015-12-09 19:44:30 +02002963 Py_DECREF(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002964 return;
2965 }
2966 attrib_in += 2;
2967 }
2968 } else {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002969 Py_INCREF(Py_None);
2970 attrib = Py_None;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002971 }
2972
2973 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002974 /* shortcut */
2975 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2976 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002977 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002978 else if (self->handle_start) {
Serhiy Storchaka36ff9972015-12-10 09:51:53 +02002979 if (attrib == Py_None) {
2980 Py_DECREF(attrib);
2981 attrib = PyDict_New();
2982 if (!attrib) {
2983 Py_DECREF(tag);
2984 return;
2985 }
2986 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002987 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002988 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002989 res = NULL;
2990
2991 Py_DECREF(tag);
2992 Py_DECREF(attrib);
2993
2994 Py_XDECREF(res);
2995}
2996
2997static void
2998expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
2999 int data_len)
3000{
3001 PyObject* data;
3002 PyObject* res;
3003
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003004 if (PyErr_Occurred())
3005 return;
3006
Neal Norwitz0269b912007-08-08 06:56:02 +00003007 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003008 if (!data)
3009 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003010
3011 if (TreeBuilder_CheckExact(self->target))
3012 /* shortcut */
3013 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3014 else if (self->handle_data)
3015 res = PyObject_CallFunction(self->handle_data, "O", data);
3016 else
3017 res = NULL;
3018
3019 Py_DECREF(data);
3020
3021 Py_XDECREF(res);
3022}
3023
3024static void
3025expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3026{
3027 PyObject* tag;
3028 PyObject* res = NULL;
3029
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003030 if (PyErr_Occurred())
3031 return;
3032
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003033 if (TreeBuilder_CheckExact(self->target))
3034 /* shortcut */
3035 /* the standard tree builder doesn't look at the end tag */
3036 res = treebuilder_handle_end(
3037 (TreeBuilderObject*) self->target, Py_None
3038 );
3039 else if (self->handle_end) {
3040 tag = makeuniversal(self, tag_in);
3041 if (tag) {
3042 res = PyObject_CallFunction(self->handle_end, "O", tag);
3043 Py_DECREF(tag);
3044 }
3045 }
3046
3047 Py_XDECREF(res);
3048}
3049
3050static void
3051expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3052 const XML_Char *uri)
3053{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003054 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3055 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003056
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003057 if (PyErr_Occurred())
3058 return;
3059
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003060 if (!target->events_append || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003061 return;
3062
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003063 if (!uri)
3064 uri = "";
3065 if (!prefix)
3066 prefix = "";
3067
3068 parcel = Py_BuildValue("ss", prefix, uri);
3069 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003070 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003071 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3072 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003073}
3074
3075static void
3076expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3077{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003078 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3079
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003080 if (PyErr_Occurred())
3081 return;
3082
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003083 if (!target->events_append)
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003084 return;
3085
3086 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003087}
3088
3089static void
3090expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3091{
3092 PyObject* comment;
3093 PyObject* res;
3094
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003095 if (PyErr_Occurred())
3096 return;
3097
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003098 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003099 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003100 if (comment) {
3101 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3102 Py_XDECREF(res);
3103 Py_DECREF(comment);
3104 }
3105 }
3106}
3107
Eli Bendersky45839902013-01-13 05:14:47 -08003108static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003109expat_start_doctype_handler(XMLParserObject *self,
3110 const XML_Char *doctype_name,
3111 const XML_Char *sysid,
3112 const XML_Char *pubid,
3113 int has_internal_subset)
3114{
3115 PyObject *self_pyobj = (PyObject *)self;
3116 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3117 PyObject *parser_doctype = NULL;
3118 PyObject *res = NULL;
3119
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003120 if (PyErr_Occurred())
3121 return;
3122
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003123 doctype_name_obj = makeuniversal(self, doctype_name);
3124 if (!doctype_name_obj)
3125 return;
3126
3127 if (sysid) {
3128 sysid_obj = makeuniversal(self, sysid);
3129 if (!sysid_obj) {
3130 Py_DECREF(doctype_name_obj);
3131 return;
3132 }
3133 } else {
3134 Py_INCREF(Py_None);
3135 sysid_obj = Py_None;
3136 }
3137
3138 if (pubid) {
3139 pubid_obj = makeuniversal(self, pubid);
3140 if (!pubid_obj) {
3141 Py_DECREF(doctype_name_obj);
3142 Py_DECREF(sysid_obj);
3143 return;
3144 }
3145 } else {
3146 Py_INCREF(Py_None);
3147 pubid_obj = Py_None;
3148 }
3149
3150 /* If the target has a handler for doctype, call it. */
3151 if (self->handle_doctype) {
3152 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3153 doctype_name_obj, pubid_obj, sysid_obj);
3154 Py_CLEAR(res);
3155 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003156 else {
3157 /* Now see if the parser itself has a doctype method. If yes and it's
3158 * a custom method, call it but warn about deprecation. If it's only
3159 * the vanilla XMLParser method, do nothing.
3160 */
3161 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3162 if (parser_doctype &&
3163 !(PyCFunction_Check(parser_doctype) &&
3164 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3165 PyCFunction_GET_FUNCTION(parser_doctype) ==
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003166 (PyCFunction) _elementtree_XMLParser_doctype)) {
3167 res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj,
3168 pubid_obj, sysid_obj);
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003169 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003170 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003171 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003172 res = PyObject_CallFunction(parser_doctype, "OOO",
3173 doctype_name_obj, pubid_obj, sysid_obj);
3174 Py_CLEAR(res);
3175 }
3176 }
3177
3178clear:
3179 Py_XDECREF(parser_doctype);
3180 Py_DECREF(doctype_name_obj);
3181 Py_DECREF(pubid_obj);
3182 Py_DECREF(sysid_obj);
3183}
3184
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003185static void
3186expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3187 const XML_Char* data_in)
3188{
3189 PyObject* target;
3190 PyObject* data;
3191 PyObject* res;
3192
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003193 if (PyErr_Occurred())
3194 return;
3195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003196 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003197 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3198 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003199 if (target && data) {
3200 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3201 Py_XDECREF(res);
3202 Py_DECREF(data);
3203 Py_DECREF(target);
3204 } else {
3205 Py_XDECREF(data);
3206 Py_XDECREF(target);
3207 }
3208 }
3209}
3210
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003211/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003212
Eli Bendersky52467b12012-06-01 07:13:08 +03003213static PyObject *
3214xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003215{
Eli Bendersky52467b12012-06-01 07:13:08 +03003216 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3217 if (self) {
3218 self->parser = NULL;
3219 self->target = self->entity = self->names = NULL;
3220 self->handle_start = self->handle_data = self->handle_end = NULL;
3221 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003222 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003223 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003224 return (PyObject *)self;
3225}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003226
Serhiy Storchakacb985562015-05-04 15:32:48 +03003227/*[clinic input]
3228_elementtree.XMLParser.__init__
3229
3230 html: object = NULL
3231 target: object = NULL
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003232 encoding: str(accept={str, NoneType}) = NULL
Serhiy Storchakacb985562015-05-04 15:32:48 +03003233
3234[clinic start generated code]*/
3235
Eli Bendersky52467b12012-06-01 07:13:08 +03003236static int
Serhiy Storchakacb985562015-05-04 15:32:48 +03003237_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
3238 PyObject *target, const char *encoding)
Larry Hastingsdbfdc382015-05-04 06:59:46 -07003239/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
Eli Bendersky52467b12012-06-01 07:13:08 +03003240{
Serhiy Storchakacb985562015-05-04 15:32:48 +03003241 self->entity = PyDict_New();
3242 if (!self->entity)
3243 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003244
Serhiy Storchakacb985562015-05-04 15:32:48 +03003245 self->names = PyDict_New();
3246 if (!self->names) {
3247 Py_CLEAR(self->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003248 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003249 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003250
Serhiy Storchakacb985562015-05-04 15:32:48 +03003251 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3252 if (!self->parser) {
3253 Py_CLEAR(self->entity);
3254 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003255 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003256 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003257 }
3258
Eli Bendersky52467b12012-06-01 07:13:08 +03003259 if (target) {
3260 Py_INCREF(target);
3261 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003262 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003263 if (!target) {
Serhiy Storchakacb985562015-05-04 15:32:48 +03003264 Py_CLEAR(self->entity);
3265 Py_CLEAR(self->names);
3266 EXPAT(ParserFree)(self->parser);
Eli Bendersky52467b12012-06-01 07:13:08 +03003267 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003268 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003269 }
Serhiy Storchakacb985562015-05-04 15:32:48 +03003270 self->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003271
Serhiy Storchakacb985562015-05-04 15:32:48 +03003272 self->handle_start = PyObject_GetAttrString(target, "start");
3273 self->handle_data = PyObject_GetAttrString(target, "data");
3274 self->handle_end = PyObject_GetAttrString(target, "end");
3275 self->handle_comment = PyObject_GetAttrString(target, "comment");
3276 self->handle_pi = PyObject_GetAttrString(target, "pi");
3277 self->handle_close = PyObject_GetAttrString(target, "close");
3278 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003279
3280 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003282 /* configure parser */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003283 EXPAT(SetUserData)(self->parser, self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003284 EXPAT(SetElementHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003285 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 (XML_StartElementHandler) expat_start_handler,
3287 (XML_EndElementHandler) expat_end_handler
3288 );
3289 EXPAT(SetDefaultHandlerExpand)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003290 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003291 (XML_DefaultHandler) expat_default_handler
3292 );
3293 EXPAT(SetCharacterDataHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003294 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 (XML_CharacterDataHandler) expat_data_handler
3296 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003297 if (self->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003298 EXPAT(SetCommentHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003299 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003300 (XML_CommentHandler) expat_comment_handler
3301 );
Serhiy Storchakacb985562015-05-04 15:32:48 +03003302 if (self->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003303 EXPAT(SetProcessingInstructionHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003304 self->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003305 (XML_ProcessingInstructionHandler) expat_pi_handler
3306 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003307 EXPAT(SetStartDoctypeDeclHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003308 self->parser,
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003309 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3310 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003311 EXPAT(SetUnknownEncodingHandler)(
Serhiy Storchakacb985562015-05-04 15:32:48 +03003312 self->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003313 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003314 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315
Eli Bendersky52467b12012-06-01 07:13:08 +03003316 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003317}
3318
Eli Bendersky52467b12012-06-01 07:13:08 +03003319static int
3320xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3321{
3322 Py_VISIT(self->handle_close);
3323 Py_VISIT(self->handle_pi);
3324 Py_VISIT(self->handle_comment);
3325 Py_VISIT(self->handle_end);
3326 Py_VISIT(self->handle_data);
3327 Py_VISIT(self->handle_start);
3328
3329 Py_VISIT(self->target);
3330 Py_VISIT(self->entity);
3331 Py_VISIT(self->names);
3332
3333 return 0;
3334}
3335
3336static int
3337xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003338{
3339 EXPAT(ParserFree)(self->parser);
3340
Antoine Pitrouc1948842012-10-01 23:40:37 +02003341 Py_CLEAR(self->handle_close);
3342 Py_CLEAR(self->handle_pi);
3343 Py_CLEAR(self->handle_comment);
3344 Py_CLEAR(self->handle_end);
3345 Py_CLEAR(self->handle_data);
3346 Py_CLEAR(self->handle_start);
3347 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003348
Antoine Pitrouc1948842012-10-01 23:40:37 +02003349 Py_CLEAR(self->target);
3350 Py_CLEAR(self->entity);
3351 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352
Eli Bendersky52467b12012-06-01 07:13:08 +03003353 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003354}
3355
Eli Bendersky52467b12012-06-01 07:13:08 +03003356static void
3357xmlparser_dealloc(XMLParserObject* self)
3358{
3359 PyObject_GC_UnTrack(self);
3360 xmlparser_gc_clear(self);
3361 Py_TYPE(self)->tp_free((PyObject *)self);
3362}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003363
3364LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003365expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003366{
3367 int ok;
3368
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003369 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3371
3372 if (PyErr_Occurred())
3373 return NULL;
3374
3375 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003376 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003377 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003378 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003379 EXPAT(GetErrorColumnNumber)(self->parser),
3380 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003381 );
3382 return NULL;
3383 }
3384
3385 Py_RETURN_NONE;
3386}
3387
Serhiy Storchakacb985562015-05-04 15:32:48 +03003388/*[clinic input]
3389_elementtree.XMLParser.close
3390
3391[clinic start generated code]*/
3392
3393static PyObject *
3394_elementtree_XMLParser_close_impl(XMLParserObject *self)
3395/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003396{
3397 /* end feeding data to parser */
3398
3399 PyObject* res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003400 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003401 if (!res)
3402 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003403
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003404 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003405 Py_DECREF(res);
3406 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003407 }
3408 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003409 Py_DECREF(res);
3410 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003411 }
3412 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003413 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003414 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003415}
3416
Serhiy Storchakacb985562015-05-04 15:32:48 +03003417/*[clinic input]
3418_elementtree.XMLParser.feed
3419
3420 data: object
3421 /
3422
3423[clinic start generated code]*/
3424
3425static PyObject *
3426_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3427/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003428{
3429 /* feed data to parser */
3430
Serhiy Storchakacb985562015-05-04 15:32:48 +03003431 if (PyUnicode_Check(data)) {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003432 Py_ssize_t data_len;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003433 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3434 if (data_ptr == NULL)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003435 return NULL;
3436 if (data_len > INT_MAX) {
3437 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3438 return NULL;
3439 }
3440 /* Explicitly set UTF-8 encoding. Return code ignored. */
3441 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
Serhiy Storchakacb985562015-05-04 15:32:48 +03003442 return expat_parse(self, data_ptr, (int)data_len, 0);
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003443 }
3444 else {
3445 Py_buffer view;
3446 PyObject *res;
Serhiy Storchakacb985562015-05-04 15:32:48 +03003447 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003448 return NULL;
3449 if (view.len > INT_MAX) {
3450 PyBuffer_Release(&view);
3451 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3452 return NULL;
3453 }
3454 res = expat_parse(self, view.buf, (int)view.len, 0);
3455 PyBuffer_Release(&view);
3456 return res;
3457 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003458}
3459
Serhiy Storchakacb985562015-05-04 15:32:48 +03003460/*[clinic input]
3461_elementtree.XMLParser._parse_whole
3462
3463 file: object
3464 /
3465
3466[clinic start generated code]*/
3467
3468static PyObject *
3469_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3470/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003471{
Eli Benderskya3699232013-05-19 18:47:23 -07003472 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003473 PyObject* reader;
3474 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003475 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003476 PyObject* res;
3477
Serhiy Storchakacb985562015-05-04 15:32:48 +03003478 reader = PyObject_GetAttrString(file, "read");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003479 if (!reader)
3480 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003481
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003482 /* read from open file object */
3483 for (;;) {
3484
3485 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3486
3487 if (!buffer) {
3488 /* read failed (e.g. due to KeyboardInterrupt) */
3489 Py_DECREF(reader);
3490 return NULL;
3491 }
3492
Eli Benderskyf996e772012-03-16 05:53:30 +02003493 if (PyUnicode_CheckExact(buffer)) {
3494 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003495 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003496 Py_DECREF(buffer);
3497 break;
3498 }
3499 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003500 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003501 if (!temp) {
3502 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003503 Py_DECREF(reader);
3504 return NULL;
3505 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003506 buffer = temp;
3507 }
3508 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003509 Py_DECREF(buffer);
3510 break;
3511 }
3512
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003513 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3514 Py_DECREF(buffer);
3515 Py_DECREF(reader);
3516 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3517 return NULL;
3518 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003519 res = expat_parse(
Serhiy Storchaka26861b02015-02-16 20:52:17 +02003520 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003521 );
3522
3523 Py_DECREF(buffer);
3524
3525 if (!res) {
3526 Py_DECREF(reader);
3527 return NULL;
3528 }
3529 Py_DECREF(res);
3530
3531 }
3532
3533 Py_DECREF(reader);
3534
3535 res = expat_parse(self, "", 0, 1);
3536
3537 if (res && TreeBuilder_CheckExact(self->target)) {
3538 Py_DECREF(res);
3539 return treebuilder_done((TreeBuilderObject*) self->target);
3540 }
3541
3542 return res;
3543}
3544
Serhiy Storchakacb985562015-05-04 15:32:48 +03003545/*[clinic input]
3546_elementtree.XMLParser.doctype
3547
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003548 name: object
3549 pubid: object
3550 system: object
3551 /
3552
Serhiy Storchakacb985562015-05-04 15:32:48 +03003553[clinic start generated code]*/
3554
3555static PyObject *
Serhiy Storchaka4a01cab2015-06-29 23:08:52 +03003556_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name,
3557 PyObject *pubid, PyObject *system)
3558/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003559{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003560 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3561 "This method of XMLParser is deprecated. Define"
3562 " doctype() method on the TreeBuilder target.",
3563 1) < 0) {
3564 return NULL;
3565 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003566 Py_RETURN_NONE;
3567}
3568
Serhiy Storchakacb985562015-05-04 15:32:48 +03003569/*[clinic input]
3570_elementtree.XMLParser._setevents
3571
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003572 events_queue: object
Serhiy Storchakacb985562015-05-04 15:32:48 +03003573 events_to_report: object = None
3574 /
3575
3576[clinic start generated code]*/
3577
3578static PyObject *
3579_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
3580 PyObject *events_queue,
3581 PyObject *events_to_report)
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003582/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003583{
3584 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003585 Py_ssize_t i, seqlen;
3586 TreeBuilderObject *target;
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003587 PyObject *events_append, *events_seq;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003588
3589 if (!TreeBuilder_CheckExact(self->target)) {
3590 PyErr_SetString(
3591 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003592 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003593 "targets"
3594 );
3595 return NULL;
3596 }
3597
3598 target = (TreeBuilderObject*) self->target;
3599
Serhiy Storchaka9ec5e252015-12-07 02:31:11 +02003600 events_append = PyObject_GetAttrString(events_queue, "append");
3601 if (events_append == NULL)
3602 return NULL;
3603 Py_XDECREF(target->events_append);
3604 target->events_append = events_append;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003605
3606 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003607 Py_CLEAR(target->start_event_obj);
3608 Py_CLEAR(target->end_event_obj);
3609 Py_CLEAR(target->start_ns_event_obj);
3610 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003611
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003612 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003614 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615 Py_RETURN_NONE;
3616 }
3617
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003618 if (!(events_seq = PySequence_Fast(events_to_report,
3619 "events must be a sequence"))) {
3620 return NULL;
3621 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003623 seqlen = PySequence_Size(events_seq);
3624 for (i = 0; i < seqlen; ++i) {
3625 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3626 char *event_name = NULL;
3627 if (PyUnicode_Check(event_name_obj)) {
3628 event_name = _PyUnicode_AsString(event_name_obj);
3629 } else if (PyBytes_Check(event_name_obj)) {
3630 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003631 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003632
3633 if (event_name == NULL) {
3634 Py_DECREF(events_seq);
3635 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3636 return NULL;
3637 } else if (strcmp(event_name, "start") == 0) {
3638 Py_INCREF(event_name_obj);
3639 target->start_event_obj = event_name_obj;
3640 } else if (strcmp(event_name, "end") == 0) {
3641 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003643 target->end_event_obj = event_name_obj;
3644 } else if (strcmp(event_name, "start-ns") == 0) {
3645 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003647 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003648 EXPAT(SetNamespaceDeclHandler)(
3649 self->parser,
3650 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3651 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3652 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003653 } else if (strcmp(event_name, "end-ns") == 0) {
3654 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003655 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003656 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003657 EXPAT(SetNamespaceDeclHandler)(
3658 self->parser,
3659 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3660 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3661 );
3662 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003663 Py_DECREF(events_seq);
3664 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665 return NULL;
3666 }
3667 }
3668
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003669 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003670 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671}
3672
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003673static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003674xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003675{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003676 if (PyUnicode_Check(nameobj)) {
3677 PyObject* res;
3678 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3679 res = self->entity;
3680 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3681 res = self->target;
3682 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3683 return PyUnicode_FromFormat(
3684 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003685 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003686 }
3687 else
3688 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003689
Alexander Belopolskye239d232010-12-08 23:31:48 +00003690 Py_INCREF(res);
3691 return res;
3692 }
3693 generic:
3694 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003695}
3696
Serhiy Storchakacb985562015-05-04 15:32:48 +03003697#include "clinic/_elementtree.c.h"
3698
3699static PyMethodDef element_methods[] = {
3700
3701 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
3702
3703 _ELEMENTTREE_ELEMENT_GET_METHODDEF
3704 _ELEMENTTREE_ELEMENT_SET_METHODDEF
3705
3706 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
3707 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
3708 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
3709
3710 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
3711 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
3712 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
3713 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
3714
3715 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
3716 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
3717 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
3718
3719 {"getiterator", (PyCFunction)_elementtree_Element_iter, METH_VARARGS|METH_KEYWORDS, _elementtree_Element_iter__doc__},
3720 _ELEMENTTREE_ELEMENT_GETCHILDREN_METHODDEF
3721
3722 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
3723 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
3724
3725 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
3726
3727 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
3728 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
3729 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
3730 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
3731 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
3732
3733 {NULL, NULL}
3734};
3735
3736static PyMappingMethods element_as_mapping = {
3737 (lenfunc) element_length,
3738 (binaryfunc) element_subscr,
3739 (objobjargproc) element_ass_subscr,
3740};
3741
Serhiy Storchakadde08152015-11-25 15:28:13 +02003742static PyGetSetDef element_getsetlist[] = {
3743 {"tag",
3744 (getter)element_tag_getter,
3745 (setter)element_tag_setter,
3746 "A string identifying what kind of data this element represents"},
3747 {"text",
3748 (getter)element_text_getter,
3749 (setter)element_text_setter,
3750 "A string of text directly after the start tag, or None"},
3751 {"tail",
3752 (getter)element_tail_getter,
3753 (setter)element_tail_setter,
3754 "A string of text directly after the end tag, or None"},
3755 {"attrib",
3756 (getter)element_attrib_getter,
3757 (setter)element_attrib_setter,
3758 "A dictionary containing the element's attributes"},
3759 {NULL},
3760};
3761
Serhiy Storchakacb985562015-05-04 15:32:48 +03003762static PyTypeObject Element_Type = {
3763 PyVarObject_HEAD_INIT(NULL, 0)
3764 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
3765 /* methods */
3766 (destructor)element_dealloc, /* tp_dealloc */
3767 0, /* tp_print */
3768 0, /* tp_getattr */
3769 0, /* tp_setattr */
3770 0, /* tp_reserved */
3771 (reprfunc)element_repr, /* tp_repr */
3772 0, /* tp_as_number */
3773 &element_as_sequence, /* tp_as_sequence */
3774 &element_as_mapping, /* tp_as_mapping */
3775 0, /* tp_hash */
3776 0, /* tp_call */
3777 0, /* tp_str */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003778 PyObject_GenericGetAttr, /* tp_getattro */
3779 0, /* tp_setattro */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003780 0, /* tp_as_buffer */
3781 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3782 /* tp_flags */
3783 0, /* tp_doc */
3784 (traverseproc)element_gc_traverse, /* tp_traverse */
3785 (inquiry)element_gc_clear, /* tp_clear */
3786 0, /* tp_richcompare */
3787 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
3788 0, /* tp_iter */
3789 0, /* tp_iternext */
3790 element_methods, /* tp_methods */
3791 0, /* tp_members */
Serhiy Storchakadde08152015-11-25 15:28:13 +02003792 element_getsetlist, /* tp_getset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003793 0, /* tp_base */
3794 0, /* tp_dict */
3795 0, /* tp_descr_get */
3796 0, /* tp_descr_set */
3797 0, /* tp_dictoffset */
3798 (initproc)element_init, /* tp_init */
3799 PyType_GenericAlloc, /* tp_alloc */
3800 element_new, /* tp_new */
3801 0, /* tp_free */
3802};
3803
3804static PyMethodDef treebuilder_methods[] = {
3805 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
3806 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
3807 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
3808 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
3809 {NULL, NULL}
3810};
3811
3812static PyTypeObject TreeBuilder_Type = {
3813 PyVarObject_HEAD_INIT(NULL, 0)
3814 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
3815 /* methods */
3816 (destructor)treebuilder_dealloc, /* tp_dealloc */
3817 0, /* tp_print */
3818 0, /* tp_getattr */
3819 0, /* tp_setattr */
3820 0, /* tp_reserved */
3821 0, /* tp_repr */
3822 0, /* tp_as_number */
3823 0, /* tp_as_sequence */
3824 0, /* tp_as_mapping */
3825 0, /* tp_hash */
3826 0, /* tp_call */
3827 0, /* tp_str */
3828 0, /* tp_getattro */
3829 0, /* tp_setattro */
3830 0, /* tp_as_buffer */
3831 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3832 /* tp_flags */
3833 0, /* tp_doc */
3834 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
3835 (inquiry)treebuilder_gc_clear, /* tp_clear */
3836 0, /* tp_richcompare */
3837 0, /* tp_weaklistoffset */
3838 0, /* tp_iter */
3839 0, /* tp_iternext */
3840 treebuilder_methods, /* tp_methods */
3841 0, /* tp_members */
3842 0, /* tp_getset */
3843 0, /* tp_base */
3844 0, /* tp_dict */
3845 0, /* tp_descr_get */
3846 0, /* tp_descr_set */
3847 0, /* tp_dictoffset */
3848 _elementtree_TreeBuilder___init__, /* tp_init */
3849 PyType_GenericAlloc, /* tp_alloc */
3850 treebuilder_new, /* tp_new */
3851 0, /* tp_free */
3852};
3853
3854static PyMethodDef xmlparser_methods[] = {
3855 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
3856 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
3857 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
3858 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
3859 _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF
3860 {NULL, NULL}
3861};
3862
Neal Norwitz227b5332006-03-22 09:28:35 +00003863static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003864 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003865 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003866 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003867 (destructor)xmlparser_dealloc, /* tp_dealloc */
3868 0, /* tp_print */
3869 0, /* tp_getattr */
3870 0, /* tp_setattr */
3871 0, /* tp_reserved */
3872 0, /* tp_repr */
3873 0, /* tp_as_number */
3874 0, /* tp_as_sequence */
3875 0, /* tp_as_mapping */
3876 0, /* tp_hash */
3877 0, /* tp_call */
3878 0, /* tp_str */
3879 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3880 0, /* tp_setattro */
3881 0, /* tp_as_buffer */
3882 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3883 /* tp_flags */
3884 0, /* tp_doc */
3885 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3886 (inquiry)xmlparser_gc_clear, /* tp_clear */
3887 0, /* tp_richcompare */
3888 0, /* tp_weaklistoffset */
3889 0, /* tp_iter */
3890 0, /* tp_iternext */
3891 xmlparser_methods, /* tp_methods */
3892 0, /* tp_members */
3893 0, /* tp_getset */
3894 0, /* tp_base */
3895 0, /* tp_dict */
3896 0, /* tp_descr_get */
3897 0, /* tp_descr_set */
3898 0, /* tp_dictoffset */
Serhiy Storchakacb985562015-05-04 15:32:48 +03003899 _elementtree_XMLParser___init__, /* tp_init */
Eli Bendersky52467b12012-06-01 07:13:08 +03003900 PyType_GenericAlloc, /* tp_alloc */
3901 xmlparser_new, /* tp_new */
3902 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003903};
3904
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003905/* ==================================================================== */
3906/* python module interface */
3907
3908static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003909 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003910 {NULL, NULL}
3911};
3912
Martin v. Löwis1a214512008-06-11 05:26:20 +00003913
Eli Bendersky532d03e2013-08-10 08:00:39 -07003914static struct PyModuleDef elementtreemodule = {
3915 PyModuleDef_HEAD_INIT,
3916 "_elementtree",
3917 NULL,
3918 sizeof(elementtreestate),
3919 _functions,
3920 NULL,
3921 elementtree_traverse,
3922 elementtree_clear,
3923 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003924};
3925
Neal Norwitzf6657e62006-12-28 04:47:50 +00003926PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003927PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003928{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003929 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003930 elementtreestate *st;
3931
3932 m = PyState_FindModule(&elementtreemodule);
3933 if (m) {
3934 Py_INCREF(m);
3935 return m;
3936 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003937
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003938 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003939 if (PyType_Ready(&ElementIter_Type) < 0)
3940 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003941 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003942 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003943 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003944 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003945 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003946 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003947
Eli Bendersky532d03e2013-08-10 08:00:39 -07003948 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003949 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003950 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003951 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003952
Eli Bendersky828efde2012-04-05 05:40:58 +03003953 if (!(temp = PyImport_ImportModule("copy")))
3954 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003955 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003956 Py_XDECREF(temp);
3957
Eli Bendersky532d03e2013-08-10 08:00:39 -07003958 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003959 return NULL;
3960
Eli Bendersky20d41742012-06-01 09:48:37 +03003961 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003962 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3963 if (expat_capi) {
3964 /* check that it's usable */
3965 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
Victor Stinner706768c2014-08-16 01:03:39 +02003966 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003967 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3968 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003969 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003970 PyErr_SetString(PyExc_ImportError,
3971 "pyexpat version is incompatible");
3972 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003973 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003974 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003975 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003976 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003977
Eli Bendersky532d03e2013-08-10 08:00:39 -07003978 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003979 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003980 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003981 Py_INCREF(st->parseerror_obj);
3982 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003983
Eli Bendersky092af1f2012-03-04 07:14:03 +02003984 Py_INCREF((PyObject *)&Element_Type);
3985 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3986
Eli Bendersky58d548d2012-05-29 15:45:16 +03003987 Py_INCREF((PyObject *)&TreeBuilder_Type);
3988 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3989
Eli Bendersky52467b12012-06-01 07:13:08 +03003990 Py_INCREF((PyObject *)&XMLParser_Type);
3991 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003992
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003993 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003994}