blob: cf819e896c3ec6834b28173e871e6c6d89b22a14 [file] [log] [blame]
Eli Benderskybf05df22013-04-20 05:44:01 -07001/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See http://www.python.org/psf/license for licensing details.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00004 *
Eli Benderskybf05df22013-04-20 05:44:01 -07005 * _elementtree - C accelerator for xml.etree.ElementTree
Florent Xiclunaf15351d2010-03-13 23:24:31 +00006 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00008 *
9 * info@pythonware.com
10 * http://www.pythonware.com
Eli Benderskybf05df22013-04-20 05:44:01 -070011 *--------------------------------------------------------------------
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000012 */
13
14#include "Python.h"
Eli Benderskyebf37a22012-04-03 22:02:37 +030015#include "structmember.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000016
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000017/* -------------------------------------------------------------------- */
18/* configuration */
19
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000020/* An element can hold this many children without extra memory
21 allocations. */
22#define STATIC_CHILDREN 4
23
24/* For best performance, chose a value so that 80-90% of all nodes
25 have no more than the given number of children. Set this to zero
26 to minimize the size of the element structure itself (this only
27 helps if you have lots of leaf nodes with attributes). */
28
29/* Also note that pymalloc always allocates blocks in multiples of
Florent Xiclunaa72a98f2012-02-13 11:03:30 +010030 eight bytes. For the current C version of ElementTree, this means
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000031 that the number of children should be an even number, at least on
32 32-bit platforms. */
33
34/* -------------------------------------------------------------------- */
35
36#if 0
37static int memory = 0;
38#define ALLOC(size, comment)\
39do { memory += size; printf("%8d - %s\n", memory, comment); } while (0)
40#define RELEASE(size, comment)\
41do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
42#else
43#define ALLOC(size, comment)
44#define RELEASE(size, comment)
45#endif
46
47/* compiler tweaks */
48#if defined(_MSC_VER)
49#define LOCAL(type) static __inline type __fastcall
50#else
51#define LOCAL(type) static type
52#endif
53
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000054/* macros used to store 'join' flags in string object pointers. note
55 that all use of text and tail as object pointers must be wrapped in
56 JOIN_OBJ. see comments in the ElementObject definition for more
57 info. */
58#define JOIN_GET(p) ((Py_uintptr_t) (p) & 1)
59#define JOIN_SET(p, flag) ((void*) ((Py_uintptr_t) (JOIN_OBJ(p)) | (flag)))
Antoine Pitrouca8aa4a2012-09-20 20:56:47 +020060#define JOIN_OBJ(p) ((PyObject*) ((Py_uintptr_t) (p) & ~(Py_uintptr_t)1))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +000061
Eli Benderskydd3661e2013-09-13 06:24:25 -070062/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
63 * reference since this function sets it to NULL.
64*/
doko@ubuntu.com0648bf72013-09-18 12:12:28 +020065static void _clear_joined_ptr(PyObject **p)
Eli Benderskydd3661e2013-09-13 06:24:25 -070066{
67 if (*p) {
68 PyObject *tmp = JOIN_OBJ(*p);
69 *p = NULL;
70 Py_DECREF(tmp);
71 }
72}
73
Ronald Oussoren138d0802013-07-19 11:11:25 +020074/* Types defined by this extension */
75static PyTypeObject Element_Type;
76static PyTypeObject ElementIter_Type;
77static PyTypeObject TreeBuilder_Type;
78static PyTypeObject XMLParser_Type;
79
80
Eli Bendersky532d03e2013-08-10 08:00:39 -070081/* Per-module state; PEP 3121 */
82typedef struct {
83 PyObject *parseerror_obj;
84 PyObject *deepcopy_obj;
85 PyObject *elementpath_obj;
86} elementtreestate;
87
88static struct PyModuleDef elementtreemodule;
89
90/* Given a module object (assumed to be _elementtree), get its per-module
91 * state.
92 */
93#define ET_STATE(mod) ((elementtreestate *) PyModule_GetState(mod))
94
95/* Find the module instance imported in the currently running sub-interpreter
96 * and get its state.
97 */
98#define ET_STATE_GLOBAL \
99 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
100
101static int
102elementtree_clear(PyObject *m)
103{
104 elementtreestate *st = ET_STATE(m);
105 Py_CLEAR(st->parseerror_obj);
106 Py_CLEAR(st->deepcopy_obj);
107 Py_CLEAR(st->elementpath_obj);
108 return 0;
109}
110
111static int
112elementtree_traverse(PyObject *m, visitproc visit, void *arg)
113{
114 elementtreestate *st = ET_STATE(m);
115 Py_VISIT(st->parseerror_obj);
116 Py_VISIT(st->deepcopy_obj);
117 Py_VISIT(st->elementpath_obj);
118 return 0;
119}
120
121static void
122elementtree_free(void *m)
123{
124 elementtree_clear((PyObject *)m);
125}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000126
127/* helpers */
128
129LOCAL(PyObject*)
130deepcopy(PyObject* object, PyObject* memo)
131{
132 /* do a deep copy of the given object */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000133 PyObject* args;
134 PyObject* result;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700135 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000136
Eli Bendersky532d03e2013-08-10 08:00:39 -0700137 if (!st->deepcopy_obj) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000138 PyErr_SetString(
139 PyExc_RuntimeError,
140 "deepcopy helper not found"
141 );
142 return NULL;
143 }
144
Antoine Pitrouc1948842012-10-01 23:40:37 +0200145 args = PyTuple_Pack(2, object, memo);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000146 if (!args)
147 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -0700148 result = PyObject_CallObject(st->deepcopy_obj, args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000149 Py_DECREF(args);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000150 return result;
151}
152
153LOCAL(PyObject*)
154list_join(PyObject* list)
155{
156 /* join list elements (destroying the list in the process) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000157 PyObject* joiner;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000158 PyObject* result;
159
Antoine Pitrouc1948842012-10-01 23:40:37 +0200160 joiner = PyUnicode_FromStringAndSize("", 0);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000161 if (!joiner)
162 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200163 result = PyUnicode_Join(joiner, list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000164 Py_DECREF(joiner);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200165 if (result)
166 Py_DECREF(list);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000167 return result;
168}
169
Eli Bendersky48d358b2012-05-30 17:57:50 +0300170/* Is the given object an empty dictionary?
171*/
172static int
173is_empty_dict(PyObject *obj)
174{
175 return PyDict_CheckExact(obj) && PyDict_Size(obj) == 0;
176}
177
178
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000179/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200180/* the Element type */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000181
182typedef struct {
183
184 /* attributes (a dictionary object), or None if no attributes */
185 PyObject* attrib;
186
187 /* child elements */
188 int length; /* actual number of items */
189 int allocated; /* allocated items */
190
191 /* this either points to _children or to a malloced buffer */
192 PyObject* *children;
193
194 PyObject* _children[STATIC_CHILDREN];
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100195
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000196} ElementObjectExtra;
197
198typedef struct {
199 PyObject_HEAD
200
201 /* element tag (a string). */
202 PyObject* tag;
203
204 /* text before first child. note that this is a tagged pointer;
205 use JOIN_OBJ to get the object pointer. the join flag is used
206 to distinguish lists created by the tree builder from lists
207 assigned to the attribute by application code; the former
208 should be joined before being returned to the user, the latter
209 should be left intact. */
210 PyObject* text;
211
212 /* text after this element, in parent. note that this is a tagged
213 pointer; use JOIN_OBJ to get the object pointer. */
214 PyObject* tail;
215
216 ElementObjectExtra* extra;
217
Eli Benderskyebf37a22012-04-03 22:02:37 +0300218 PyObject *weakreflist; /* For tp_weaklistoffset */
219
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000220} ElementObject;
221
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000222
Christian Heimes90aa7642007-12-19 02:45:37 +0000223#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000224
225/* -------------------------------------------------------------------- */
Eli Bendersky092af1f2012-03-04 07:14:03 +0200226/* Element constructors and destructor */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000227
228LOCAL(int)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200229create_extra(ElementObject* self, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000230{
231 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
Victor Stinner81aac732013-07-12 02:03:34 +0200232 if (!self->extra) {
233 PyErr_NoMemory();
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000234 return -1;
Victor Stinner81aac732013-07-12 02:03:34 +0200235 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000236
237 if (!attrib)
238 attrib = Py_None;
239
240 Py_INCREF(attrib);
241 self->extra->attrib = attrib;
242
243 self->extra->length = 0;
244 self->extra->allocated = STATIC_CHILDREN;
245 self->extra->children = self->extra->_children;
246
247 return 0;
248}
249
250LOCAL(void)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200251dealloc_extra(ElementObject* self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000252{
Eli Bendersky08b85292012-04-04 15:55:07 +0300253 ElementObjectExtra *myextra;
254 int i;
255
Eli Benderskyebf37a22012-04-03 22:02:37 +0300256 if (!self->extra)
257 return;
258
259 /* Avoid DECREFs calling into this code again (cycles, etc.)
260 */
Eli Bendersky08b85292012-04-04 15:55:07 +0300261 myextra = self->extra;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300262 self->extra = NULL;
263
264 Py_DECREF(myextra->attrib);
265
Eli Benderskyebf37a22012-04-03 22:02:37 +0300266 for (i = 0; i < myextra->length; i++)
267 Py_DECREF(myextra->children[i]);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000268
Eli Benderskyebf37a22012-04-03 22:02:37 +0300269 if (myextra->children != myextra->_children)
270 PyObject_Free(myextra->children);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000271
Eli Benderskyebf37a22012-04-03 22:02:37 +0300272 PyObject_Free(myextra);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000273}
274
Eli Bendersky092af1f2012-03-04 07:14:03 +0200275/* Convenience internal function to create new Element objects with the given
276 * tag and attributes.
277*/
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000278LOCAL(PyObject*)
Eli Bendersky092af1f2012-03-04 07:14:03 +0200279create_new_element(PyObject* tag, PyObject* attrib)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000280{
281 ElementObject* self;
282
Eli Bendersky0192ba32012-03-30 16:38:33 +0300283 self = PyObject_GC_New(ElementObject, &Element_Type);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000284 if (self == NULL)
285 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000286 self->extra = NULL;
287
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000288 Py_INCREF(tag);
289 self->tag = tag;
290
291 Py_INCREF(Py_None);
292 self->text = Py_None;
293
294 Py_INCREF(Py_None);
295 self->tail = Py_None;
296
Eli Benderskyebf37a22012-04-03 22:02:37 +0300297 self->weakreflist = NULL;
298
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200299 ALLOC(sizeof(ElementObject), "create element");
300 PyObject_GC_Track(self);
301
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200302 if (attrib != Py_None && !is_empty_dict(attrib)) {
303 if (create_extra(self, attrib) < 0) {
Victor Stinnerd917dcb2013-07-12 02:05:17 +0200304 Py_DECREF(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200305 return NULL;
306 }
307 }
308
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000309 return (PyObject*) self;
310}
311
Eli Bendersky092af1f2012-03-04 07:14:03 +0200312static PyObject *
313element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
314{
315 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
316 if (e != NULL) {
317 Py_INCREF(Py_None);
318 e->tag = Py_None;
319
320 Py_INCREF(Py_None);
321 e->text = Py_None;
322
323 Py_INCREF(Py_None);
324 e->tail = Py_None;
325
326 e->extra = NULL;
Eli Benderskyebf37a22012-04-03 22:02:37 +0300327 e->weakreflist = NULL;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200328 }
329 return (PyObject *)e;
330}
331
Eli Bendersky737b1732012-05-29 06:02:56 +0300332/* Helper function for extracting the attrib dictionary from a keywords dict.
333 * This is required by some constructors/functions in this module that can
Eli Bendersky45839902013-01-13 05:14:47 -0800334 * either accept attrib as a keyword argument or all attributes splashed
Eli Bendersky737b1732012-05-29 06:02:56 +0300335 * directly into *kwds.
Eli Benderskyd4cb4b72013-04-22 05:25:25 -0700336 *
337 * Return a dictionary with the content of kwds merged into the content of
338 * attrib. If there is no attrib keyword, return a copy of kwds.
Eli Bendersky737b1732012-05-29 06:02:56 +0300339 */
340static PyObject*
341get_attrib_from_keywords(PyObject *kwds)
342{
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700343 PyObject *attrib_str = PyUnicode_FromString("attrib");
344 PyObject *attrib = PyDict_GetItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300345
346 if (attrib) {
347 /* If attrib was found in kwds, copy its value and remove it from
348 * kwds
349 */
350 if (!PyDict_Check(attrib)) {
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700351 Py_DECREF(attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300352 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
353 Py_TYPE(attrib)->tp_name);
354 return NULL;
355 }
356 attrib = PyDict_Copy(attrib);
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700357 PyDict_DelItem(kwds, attrib_str);
Eli Bendersky737b1732012-05-29 06:02:56 +0300358 } else {
359 attrib = PyDict_New();
360 }
Eli Bendersky45f3d2f2013-04-24 05:34:07 -0700361
362 Py_DECREF(attrib_str);
363
364 /* attrib can be NULL if PyDict_New failed */
365 if (attrib)
Christian Heimes7ed42942013-07-20 15:12:09 +0200366 if (PyDict_Update(attrib, kwds) < 0)
367 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300368 return attrib;
369}
370
Eli Bendersky092af1f2012-03-04 07:14:03 +0200371static int
372element_init(PyObject *self, PyObject *args, PyObject *kwds)
373{
374 PyObject *tag;
375 PyObject *tmp;
376 PyObject *attrib = NULL;
377 ElementObject *self_elem;
378
379 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
380 return -1;
381
Eli Bendersky737b1732012-05-29 06:02:56 +0300382 if (attrib) {
383 /* attrib passed as positional arg */
384 attrib = PyDict_Copy(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200385 if (!attrib)
386 return -1;
Eli Bendersky737b1732012-05-29 06:02:56 +0300387 if (kwds) {
388 if (PyDict_Update(attrib, kwds) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200389 Py_DECREF(attrib);
Eli Bendersky737b1732012-05-29 06:02:56 +0300390 return -1;
391 }
392 }
393 } else if (kwds) {
394 /* have keywords args */
395 attrib = get_attrib_from_keywords(kwds);
396 if (!attrib)
397 return -1;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200398 }
399
400 self_elem = (ElementObject *)self;
401
Antoine Pitrouc1948842012-10-01 23:40:37 +0200402 if (attrib != NULL && !is_empty_dict(attrib)) {
Eli Bendersky092af1f2012-03-04 07:14:03 +0200403 if (create_extra(self_elem, attrib) < 0) {
Antoine Pitrouc1948842012-10-01 23:40:37 +0200404 Py_DECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200405 return -1;
406 }
407 }
408
Eli Bendersky48d358b2012-05-30 17:57:50 +0300409 /* We own a reference to attrib here and it's no longer needed. */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200410 Py_XDECREF(attrib);
Eli Bendersky092af1f2012-03-04 07:14:03 +0200411
412 /* Replace the objects already pointed to by tag, text and tail. */
413 tmp = self_elem->tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200414 Py_INCREF(tag);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200415 self_elem->tag = tag;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200416 Py_DECREF(tmp);
417
418 tmp = self_elem->text;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200419 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200420 self_elem->text = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200421 Py_DECREF(JOIN_OBJ(tmp));
422
423 tmp = self_elem->tail;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200424 Py_INCREF(Py_None);
Antoine Pitrouc1948842012-10-01 23:40:37 +0200425 self_elem->tail = Py_None;
Eli Bendersky092af1f2012-03-04 07:14:03 +0200426 Py_DECREF(JOIN_OBJ(tmp));
427
428 return 0;
429}
430
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000431LOCAL(int)
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200432element_resize(ElementObject* self, Py_ssize_t extra)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000433{
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200434 Py_ssize_t size;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000435 PyObject* *children;
436
437 /* make sure self->children can hold the given number of extra
438 elements. set an exception and return -1 if allocation failed */
439
Victor Stinner5f0af232013-07-11 23:01:36 +0200440 if (!self->extra) {
441 if (create_extra(self, NULL) < 0)
442 return -1;
443 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000444
445 size = self->extra->length + extra;
446
447 if (size > self->extra->allocated) {
448 /* use Python 2.4's list growth strategy */
449 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
Christian Heimes679db4a2008-01-18 09:56:22 +0000450 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100451 * which needs at least 4 bytes.
452 * Although it's a false alarm always assume at least one child to
Christian Heimes679db4a2008-01-18 09:56:22 +0000453 * be safe.
454 */
455 size = size ? size : 1;
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200456 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
457 goto nomemory;
458 if (size > INT_MAX) {
459 PyErr_SetString(PyExc_OverflowError,
460 "too many children");
461 return -1;
462 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000463 if (self->extra->children != self->extra->_children) {
Christian Heimes679db4a2008-01-18 09:56:22 +0000464 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100465 * "children", which needs at least 4 bytes. Although it's a
Christian Heimes679db4a2008-01-18 09:56:22 +0000466 * false alarm always assume at least one child to be safe.
467 */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000468 children = PyObject_Realloc(self->extra->children,
469 size * sizeof(PyObject*));
470 if (!children)
471 goto nomemory;
472 } else {
473 children = PyObject_Malloc(size * sizeof(PyObject*));
474 if (!children)
475 goto nomemory;
476 /* copy existing children from static area to malloc buffer */
477 memcpy(children, self->extra->children,
478 self->extra->length * sizeof(PyObject*));
479 }
480 self->extra->children = children;
481 self->extra->allocated = size;
482 }
483
484 return 0;
485
486 nomemory:
487 PyErr_NoMemory();
488 return -1;
489}
490
491LOCAL(int)
492element_add_subelement(ElementObject* self, PyObject* element)
493{
494 /* add a child element to a parent */
495
496 if (element_resize(self, 1) < 0)
497 return -1;
498
499 Py_INCREF(element);
500 self->extra->children[self->extra->length] = element;
501
502 self->extra->length++;
503
504 return 0;
505}
506
507LOCAL(PyObject*)
508element_get_attrib(ElementObject* self)
509{
510 /* return borrowed reference to attrib dictionary */
511 /* note: this function assumes that the extra section exists */
512
513 PyObject* res = self->extra->attrib;
514
515 if (res == Py_None) {
516 /* create missing dictionary */
517 res = PyDict_New();
518 if (!res)
519 return NULL;
Antoine Pitrouc1948842012-10-01 23:40:37 +0200520 Py_DECREF(Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000521 self->extra->attrib = res;
522 }
523
524 return res;
525}
526
527LOCAL(PyObject*)
528element_get_text(ElementObject* self)
529{
530 /* return borrowed reference to text attribute */
531
532 PyObject* res = self->text;
533
534 if (JOIN_GET(res)) {
535 res = JOIN_OBJ(res);
536 if (PyList_CheckExact(res)) {
537 res = list_join(res);
538 if (!res)
539 return NULL;
540 self->text = res;
541 }
542 }
543
544 return res;
545}
546
547LOCAL(PyObject*)
548element_get_tail(ElementObject* self)
549{
550 /* return borrowed reference to text attribute */
551
552 PyObject* res = self->tail;
553
554 if (JOIN_GET(res)) {
555 res = JOIN_OBJ(res);
556 if (PyList_CheckExact(res)) {
557 res = list_join(res);
558 if (!res)
559 return NULL;
560 self->tail = res;
561 }
562 }
563
564 return res;
565}
566
567static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +0300568subelement(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000569{
570 PyObject* elem;
571
572 ElementObject* parent;
573 PyObject* tag;
574 PyObject* attrib = NULL;
575 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
576 &Element_Type, &parent, &tag,
Eli Bendersky163d7f02013-11-24 06:55:04 -0800577 &PyDict_Type, &attrib)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000578 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -0800579 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000580
Eli Bendersky737b1732012-05-29 06:02:56 +0300581 if (attrib) {
582 /* attrib passed as positional arg */
583 attrib = PyDict_Copy(attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000584 if (!attrib)
585 return NULL;
Eli Bendersky737b1732012-05-29 06:02:56 +0300586 if (kwds) {
587 if (PyDict_Update(attrib, kwds) < 0) {
588 return NULL;
589 }
590 }
591 } else if (kwds) {
592 /* have keyword args */
593 attrib = get_attrib_from_keywords(kwds);
594 if (!attrib)
595 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000596 } else {
Eli Bendersky737b1732012-05-29 06:02:56 +0300597 /* no attrib arg, no kwds, so no attribute */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000598 Py_INCREF(Py_None);
599 attrib = Py_None;
600 }
601
Eli Bendersky092af1f2012-03-04 07:14:03 +0200602 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000603 Py_DECREF(attrib);
Victor Stinner71c8b7e2013-07-11 23:08:39 +0200604 if (elem == NULL)
605 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000606
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000607 if (element_add_subelement(parent, elem) < 0) {
608 Py_DECREF(elem);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000609 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000610 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000611
612 return elem;
613}
614
Eli Bendersky0192ba32012-03-30 16:38:33 +0300615static int
616element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
617{
618 Py_VISIT(self->tag);
619 Py_VISIT(JOIN_OBJ(self->text));
620 Py_VISIT(JOIN_OBJ(self->tail));
621
622 if (self->extra) {
623 int i;
624 Py_VISIT(self->extra->attrib);
625
626 for (i = 0; i < self->extra->length; ++i)
627 Py_VISIT(self->extra->children[i]);
628 }
629 return 0;
630}
631
632static int
633element_gc_clear(ElementObject *self)
634{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300635 Py_CLEAR(self->tag);
Eli Benderskydd3661e2013-09-13 06:24:25 -0700636 _clear_joined_ptr(&self->text);
637 _clear_joined_ptr(&self->tail);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300638
639 /* After dropping all references from extra, it's no longer valid anyway,
Eli Benderskyebf37a22012-04-03 22:02:37 +0300640 * so fully deallocate it.
Eli Bendersky0192ba32012-03-30 16:38:33 +0300641 */
Eli Benderskyebf37a22012-04-03 22:02:37 +0300642 dealloc_extra(self);
Eli Bendersky0192ba32012-03-30 16:38:33 +0300643 return 0;
644}
645
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000646static void
647element_dealloc(ElementObject* self)
648{
Eli Bendersky0192ba32012-03-30 16:38:33 +0300649 PyObject_GC_UnTrack(self);
Eli Benderskyebf37a22012-04-03 22:02:37 +0300650
651 if (self->weakreflist != NULL)
652 PyObject_ClearWeakRefs((PyObject *) self);
653
Eli Bendersky0192ba32012-03-30 16:38:33 +0300654 /* element_gc_clear clears all references and deallocates extra
655 */
656 element_gc_clear(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000657
658 RELEASE(sizeof(ElementObject), "destroy element");
Eli Bendersky092af1f2012-03-04 07:14:03 +0200659 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000660}
661
662/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000663
664static PyObject*
665element_append(ElementObject* self, PyObject* args)
666{
667 PyObject* element;
668 if (!PyArg_ParseTuple(args, "O!:append", &Element_Type, &element))
669 return NULL;
670
671 if (element_add_subelement(self, element) < 0)
672 return NULL;
673
674 Py_RETURN_NONE;
675}
676
677static PyObject*
Eli Bendersky0192ba32012-03-30 16:38:33 +0300678element_clearmethod(ElementObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000679{
680 if (!PyArg_ParseTuple(args, ":clear"))
681 return NULL;
682
Eli Benderskyebf37a22012-04-03 22:02:37 +0300683 dealloc_extra(self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000684
685 Py_INCREF(Py_None);
686 Py_DECREF(JOIN_OBJ(self->text));
687 self->text = Py_None;
688
689 Py_INCREF(Py_None);
690 Py_DECREF(JOIN_OBJ(self->tail));
691 self->tail = Py_None;
692
693 Py_RETURN_NONE;
694}
695
696static PyObject*
697element_copy(ElementObject* self, PyObject* args)
698{
699 int i;
700 ElementObject* element;
701
702 if (!PyArg_ParseTuple(args, ":__copy__"))
703 return NULL;
704
Eli Bendersky092af1f2012-03-04 07:14:03 +0200705 element = (ElementObject*) create_new_element(
Eli Bendersky163d7f02013-11-24 06:55:04 -0800706 self->tag, (self->extra) ? self->extra->attrib : Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000707 if (!element)
708 return NULL;
709
710 Py_DECREF(JOIN_OBJ(element->text));
711 element->text = self->text;
712 Py_INCREF(JOIN_OBJ(element->text));
713
714 Py_DECREF(JOIN_OBJ(element->tail));
715 element->tail = self->tail;
716 Py_INCREF(JOIN_OBJ(element->tail));
717
718 if (self->extra) {
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000719 if (element_resize(element, self->extra->length) < 0) {
720 Py_DECREF(element);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000721 return NULL;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +0000722 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000723
724 for (i = 0; i < self->extra->length; i++) {
725 Py_INCREF(self->extra->children[i]);
726 element->extra->children[i] = self->extra->children[i];
727 }
728
729 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000730 }
731
732 return (PyObject*) element;
733}
734
735static PyObject*
736element_deepcopy(ElementObject* self, PyObject* args)
737{
738 int i;
739 ElementObject* element;
740 PyObject* tag;
741 PyObject* attrib;
742 PyObject* text;
743 PyObject* tail;
744 PyObject* id;
745
746 PyObject* memo;
747 if (!PyArg_ParseTuple(args, "O:__deepcopy__", &memo))
748 return NULL;
749
750 tag = deepcopy(self->tag, memo);
751 if (!tag)
752 return NULL;
753
754 if (self->extra) {
755 attrib = deepcopy(self->extra->attrib, memo);
756 if (!attrib) {
757 Py_DECREF(tag);
758 return NULL;
759 }
760 } else {
761 Py_INCREF(Py_None);
762 attrib = Py_None;
763 }
764
Eli Bendersky092af1f2012-03-04 07:14:03 +0200765 element = (ElementObject*) create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000766
767 Py_DECREF(tag);
768 Py_DECREF(attrib);
769
770 if (!element)
771 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +0100772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000773 text = deepcopy(JOIN_OBJ(self->text), memo);
774 if (!text)
775 goto error;
776 Py_DECREF(element->text);
777 element->text = JOIN_SET(text, JOIN_GET(self->text));
778
779 tail = deepcopy(JOIN_OBJ(self->tail), memo);
780 if (!tail)
781 goto error;
782 Py_DECREF(element->tail);
783 element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
784
785 if (self->extra) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000786 if (element_resize(element, self->extra->length) < 0)
787 goto error;
788
789 for (i = 0; i < self->extra->length; i++) {
790 PyObject* child = deepcopy(self->extra->children[i], memo);
791 if (!child) {
792 element->extra->length = i;
793 goto error;
794 }
795 element->extra->children[i] = child;
796 }
797
798 element->extra->length = self->extra->length;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000799 }
800
801 /* add object to memo dictionary (so deepcopy won't visit it again) */
Antoine Pitrouc1948842012-10-01 23:40:37 +0200802 id = PyLong_FromSsize_t((Py_uintptr_t) self);
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803 if (!id)
804 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +0000805
806 i = PyDict_SetItem(memo, id, (PyObject*) element);
807
808 Py_DECREF(id);
809
810 if (i < 0)
811 goto error;
812
813 return (PyObject*) element;
814
815 error:
816 Py_DECREF(element);
817 return NULL;
818}
819
Martin v. Löwisbce16662012-06-17 10:41:22 +0200820static PyObject*
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200821element_sizeof(PyObject* myself, PyObject* args)
Martin v. Löwisbce16662012-06-17 10:41:22 +0200822{
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200823 ElementObject *self = (ElementObject*)myself;
Martin v. Löwisbce16662012-06-17 10:41:22 +0200824 Py_ssize_t result = sizeof(ElementObject);
825 if (self->extra) {
826 result += sizeof(ElementObjectExtra);
827 if (self->extra->children != self->extra->_children)
828 result += sizeof(PyObject*) * self->extra->allocated;
829 }
830 return PyLong_FromSsize_t(result);
831}
832
Eli Bendersky698bdb22013-01-10 06:01:06 -0800833/* dict keys for getstate/setstate. */
834#define PICKLED_TAG "tag"
835#define PICKLED_CHILDREN "_children"
836#define PICKLED_ATTRIB "attrib"
837#define PICKLED_TAIL "tail"
838#define PICKLED_TEXT "text"
839
840/* __getstate__ returns a fabricated instance dict as in the pure-Python
841 * Element implementation, for interoperability/interchangeability. This
842 * makes the pure-Python implementation details an API, but (a) there aren't
843 * any unnecessary structures there; and (b) it buys compatibility with 3.2
844 * pickles. See issue #16076.
845 */
846static PyObject *
847element_getstate(ElementObject *self)
848{
849 int i, noattrib;
850 PyObject *instancedict = NULL, *children;
851
852 /* Build a list of children. */
853 children = PyList_New(self->extra ? self->extra->length : 0);
854 if (!children)
855 return NULL;
856 for (i = 0; i < PyList_GET_SIZE(children); i++) {
857 PyObject *child = self->extra->children[i];
858 Py_INCREF(child);
859 PyList_SET_ITEM(children, i, child);
860 }
861
862 /* Construct the state object. */
863 noattrib = (self->extra == NULL || self->extra->attrib == Py_None);
864 if (noattrib)
865 instancedict = Py_BuildValue("{sOsOs{}sOsO}",
866 PICKLED_TAG, self->tag,
867 PICKLED_CHILDREN, children,
868 PICKLED_ATTRIB,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700869 PICKLED_TEXT, JOIN_OBJ(self->text),
870 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800871 else
872 instancedict = Py_BuildValue("{sOsOsOsOsO}",
873 PICKLED_TAG, self->tag,
874 PICKLED_CHILDREN, children,
875 PICKLED_ATTRIB, self->extra->attrib,
Eli Benderskydd3661e2013-09-13 06:24:25 -0700876 PICKLED_TEXT, JOIN_OBJ(self->text),
877 PICKLED_TAIL, JOIN_OBJ(self->tail));
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800878 if (instancedict) {
879 Py_DECREF(children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800880 return instancedict;
Eli Benderskyb8f6dc82013-01-12 05:20:16 -0800881 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800882 else {
883 for (i = 0; i < PyList_GET_SIZE(children); i++)
884 Py_DECREF(PyList_GET_ITEM(children, i));
885 Py_DECREF(children);
886
887 return NULL;
888 }
889}
890
891static PyObject *
892element_setstate_from_attributes(ElementObject *self,
893 PyObject *tag,
894 PyObject *attrib,
895 PyObject *text,
896 PyObject *tail,
897 PyObject *children)
898{
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200899 int i, nchildren;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800900
901 if (!tag) {
902 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
903 return NULL;
904 }
Eli Bendersky698bdb22013-01-10 06:01:06 -0800905
906 Py_CLEAR(self->tag);
907 self->tag = tag;
908 Py_INCREF(self->tag);
909
Eli Benderskydd3661e2013-09-13 06:24:25 -0700910 _clear_joined_ptr(&self->text);
911 self->text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
912 Py_INCREF(JOIN_OBJ(self->text));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800913
Eli Benderskydd3661e2013-09-13 06:24:25 -0700914 _clear_joined_ptr(&self->tail);
915 self->tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
916 Py_INCREF(JOIN_OBJ(self->tail));
Eli Bendersky698bdb22013-01-10 06:01:06 -0800917
918 /* Handle ATTRIB and CHILDREN. */
919 if (!children && !attrib)
920 Py_RETURN_NONE;
921
922 /* Compute 'nchildren'. */
923 if (children) {
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200924 Py_ssize_t size;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800925 if (!PyList_Check(children)) {
926 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
927 return NULL;
928 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +0200929 size = PyList_Size(children);
930 /* expat limits nchildren to int */
931 if (size > INT_MAX) {
932 PyErr_SetString(PyExc_OverflowError, "too many children");
933 return NULL;
934 }
935 nchildren = (int)size;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800936 }
937 else {
938 nchildren = 0;
939 }
940
941 /* Allocate 'extra'. */
942 if (element_resize(self, nchildren)) {
943 return NULL;
944 }
945 assert(self->extra && self->extra->allocated >= nchildren);
946
947 /* Copy children */
948 for (i = 0; i < nchildren; i++) {
949 self->extra->children[i] = PyList_GET_ITEM(children, i);
950 Py_INCREF(self->extra->children[i]);
951 }
952
953 self->extra->length = nchildren;
954 self->extra->allocated = nchildren;
955
956 /* Stash attrib. */
957 if (attrib) {
958 Py_CLEAR(self->extra->attrib);
959 self->extra->attrib = attrib;
960 Py_INCREF(attrib);
961 }
962
963 Py_RETURN_NONE;
964}
965
966/* __setstate__ for Element instance from the Python implementation.
967 * 'state' should be the instance dict.
968 */
969static PyObject *
970element_setstate_from_Python(ElementObject *self, PyObject *state)
971{
972 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
973 PICKLED_TAIL, PICKLED_CHILDREN, 0};
974 PyObject *args;
975 PyObject *tag, *attrib, *text, *tail, *children;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800976 PyObject *retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800977
Eli Bendersky698bdb22013-01-10 06:01:06 -0800978 tag = attrib = text = tail = children = NULL;
979 args = PyTuple_New(0);
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800980 if (!args)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800981 return NULL;
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800982
983 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
984 &attrib, &text, &tail, &children))
985 retval = element_setstate_from_attributes(self, tag, attrib, text,
986 tail, children);
Eli Bendersky698bdb22013-01-10 06:01:06 -0800987 else
Eli Bendersky799e3ed2013-01-12 05:42:38 -0800988 retval = NULL;
989
990 Py_DECREF(args);
991 return retval;
Eli Bendersky698bdb22013-01-10 06:01:06 -0800992}
993
994static PyObject *
995element_setstate(ElementObject *self, PyObject *state)
996{
997 if (!PyDict_CheckExact(state)) {
998 PyErr_Format(PyExc_TypeError,
999 "Don't know how to unpickle \"%.200R\" as an Element",
1000 state);
1001 return NULL;
1002 }
1003 else
1004 return element_setstate_from_Python(self, state);
1005}
1006
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001007LOCAL(int)
1008checkpath(PyObject* tag)
1009{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001010 Py_ssize_t i;
1011 int check = 1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001012
1013 /* check if a tag contains an xpath character */
1014
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001015#define PATHCHAR(ch) \
1016 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001017
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001018 if (PyUnicode_Check(tag)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1020 void *data = PyUnicode_DATA(tag);
1021 unsigned int kind = PyUnicode_KIND(tag);
1022 for (i = 0; i < len; i++) {
1023 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1024 if (ch == '{')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001025 check = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026 else if (ch == '}')
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001027 check = 1;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001028 else if (check && PATHCHAR(ch))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001029 return 1;
1030 }
1031 return 0;
1032 }
Christian Heimes72b710a2008-05-26 13:28:38 +00001033 if (PyBytes_Check(tag)) {
1034 char *p = PyBytes_AS_STRING(tag);
1035 for (i = 0; i < PyBytes_GET_SIZE(tag); i++) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001036 if (p[i] == '{')
1037 check = 0;
1038 else if (p[i] == '}')
1039 check = 1;
1040 else if (check && PATHCHAR(p[i]))
1041 return 1;
1042 }
1043 return 0;
1044 }
1045
1046 return 1; /* unknown type; might be path expression */
1047}
1048
1049static PyObject*
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001050element_extend(ElementObject* self, PyObject* args)
1051{
1052 PyObject* seq;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001053 Py_ssize_t i;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001054
1055 PyObject* seq_in;
1056 if (!PyArg_ParseTuple(args, "O:extend", &seq_in))
1057 return NULL;
1058
1059 seq = PySequence_Fast(seq_in, "");
1060 if (!seq) {
1061 PyErr_Format(
1062 PyExc_TypeError,
1063 "expected sequence, not \"%.200s\"", Py_TYPE(seq_in)->tp_name
1064 );
1065 return NULL;
1066 }
1067
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001068 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001069 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001070 Py_INCREF(element);
1071 if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) {
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001072 PyErr_Format(
1073 PyExc_TypeError,
1074 "expected an Element, not \"%.200s\"",
1075 Py_TYPE(element)->tp_name);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001076 Py_DECREF(seq);
1077 Py_DECREF(element);
Eli Bendersky396e8fc2012-03-23 14:24:20 +02001078 return NULL;
1079 }
1080
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001081 if (element_add_subelement(self, element) < 0) {
1082 Py_DECREF(seq);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001083 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001084 return NULL;
1085 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001086 Py_DECREF(element);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001087 }
1088
1089 Py_DECREF(seq);
1090
1091 Py_RETURN_NONE;
1092}
1093
1094static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001095element_find(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001096{
1097 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001098 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001099 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001100 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001101 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001102
Eli Bendersky737b1732012-05-29 06:02:56 +03001103 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist,
1104 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001105 return NULL;
1106
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001107 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001108 _Py_IDENTIFIER(find);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001109 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001110 st->elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001111 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001112 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001113
1114 if (!self->extra)
1115 Py_RETURN_NONE;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001116
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001117 for (i = 0; i < self->extra->length; i++) {
1118 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001119 int rc;
1120 if (!Element_CheckExact(item))
1121 continue;
1122 Py_INCREF(item);
1123 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1124 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001125 return item;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001126 Py_DECREF(item);
1127 if (rc < 0)
1128 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001129 }
1130
1131 Py_RETURN_NONE;
1132}
1133
1134static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001135element_findtext(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001136{
1137 int i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001138 PyObject* tag;
1139 PyObject* default_value = Py_None;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001140 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001141 _Py_IDENTIFIER(findtext);
Eli Bendersky737b1732012-05-29 06:02:56 +03001142 static char *kwlist[] = {"path", "default", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001143 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001144
Eli Bendersky737b1732012-05-29 06:02:56 +03001145 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist,
1146 &tag, &default_value, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001147 return NULL;
1148
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001149 if (checkpath(tag) || namespaces != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001150 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001151 st->elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001152 );
1153
1154 if (!self->extra) {
1155 Py_INCREF(default_value);
1156 return default_value;
1157 }
1158
1159 for (i = 0; i < self->extra->length; i++) {
1160 ElementObject* item = (ElementObject*) self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001161 int rc;
1162 if (!Element_CheckExact(item))
1163 continue;
1164 Py_INCREF(item);
1165 rc = PyObject_RichCompareBool(item->tag, tag, Py_EQ);
1166 if (rc > 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001167 PyObject* text = element_get_text(item);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001168 if (text == Py_None) {
1169 Py_DECREF(item);
Eli Bendersky25771b32013-01-13 05:26:07 -08001170 return PyUnicode_New(0, 0);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001171 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001172 Py_XINCREF(text);
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001173 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001174 return text;
1175 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001176 Py_DECREF(item);
1177 if (rc < 0)
1178 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001179 }
1180
1181 Py_INCREF(default_value);
1182 return default_value;
1183}
1184
1185static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001186element_findall(ElementObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001187{
1188 int i;
1189 PyObject* out;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001190 PyObject* tag;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001191 PyObject* namespaces = Py_None;
Eli Bendersky737b1732012-05-29 06:02:56 +03001192 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001193 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001194
Eli Bendersky737b1732012-05-29 06:02:56 +03001195 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist,
1196 &tag, &namespaces))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001197 return NULL;
1198
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001199 if (checkpath(tag) || namespaces != Py_None) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001200 _Py_IDENTIFIER(findall);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001201 return _PyObject_CallMethodId(
Eli Bendersky532d03e2013-08-10 08:00:39 -07001202 st->elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001203 );
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001204 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001205
1206 out = PyList_New(0);
1207 if (!out)
1208 return NULL;
1209
1210 if (!self->extra)
1211 return out;
1212
1213 for (i = 0; i < self->extra->length; i++) {
1214 PyObject* item = self->extra->children[i];
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001215 int rc;
1216 if (!Element_CheckExact(item))
1217 continue;
1218 Py_INCREF(item);
1219 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ);
1220 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1221 Py_DECREF(item);
1222 Py_DECREF(out);
1223 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001224 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001225 Py_DECREF(item);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001226 }
1227
1228 return out;
1229}
1230
1231static PyObject*
Eli Bendersky737b1732012-05-29 06:02:56 +03001232element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001233{
1234 PyObject* tag;
1235 PyObject* namespaces = Py_None;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001236 _Py_IDENTIFIER(iterfind);
Eli Bendersky737b1732012-05-29 06:02:56 +03001237 static char *kwlist[] = {"path", "namespaces", 0};
Eli Bendersky532d03e2013-08-10 08:00:39 -07001238 elementtreestate *st = ET_STATE_GLOBAL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001239
Eli Bendersky737b1732012-05-29 06:02:56 +03001240 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist,
Eli Bendersky163d7f02013-11-24 06:55:04 -08001241 &tag, &namespaces)) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001242 return NULL;
Eli Bendersky163d7f02013-11-24 06:55:04 -08001243 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001244
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001245 return _PyObject_CallMethodId(
Eli Bendersky163d7f02013-11-24 06:55:04 -08001246 st->elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001247}
1248
1249static PyObject*
Eli Benderskya8736902013-01-05 06:26:39 -08001250element_get(ElementObject* self, PyObject* args, PyObject* kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001251{
1252 PyObject* value;
Eli Benderskya8736902013-01-05 06:26:39 -08001253 static char* kwlist[] = {"key", "default", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001254
1255 PyObject* key;
1256 PyObject* default_value = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001257
1258 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:get", kwlist, &key,
1259 &default_value))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001260 return NULL;
1261
1262 if (!self->extra || self->extra->attrib == Py_None)
1263 value = default_value;
1264 else {
1265 value = PyDict_GetItem(self->extra->attrib, key);
1266 if (!value)
1267 value = default_value;
1268 }
1269
1270 Py_INCREF(value);
1271 return value;
1272}
1273
1274static PyObject*
1275element_getchildren(ElementObject* self, PyObject* args)
1276{
1277 int i;
1278 PyObject* list;
1279
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001280 /* FIXME: report as deprecated? */
1281
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001282 if (!PyArg_ParseTuple(args, ":getchildren"))
1283 return NULL;
1284
1285 if (!self->extra)
1286 return PyList_New(0);
1287
1288 list = PyList_New(self->extra->length);
1289 if (!list)
1290 return NULL;
1291
1292 for (i = 0; i < self->extra->length; i++) {
1293 PyObject* item = self->extra->children[i];
1294 Py_INCREF(item);
1295 PyList_SET_ITEM(list, i, item);
1296 }
1297
1298 return list;
1299}
1300
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001301
Eli Bendersky64d11e62012-06-15 07:42:50 +03001302static PyObject *
1303create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1304
1305
1306static PyObject *
Eli Benderskya8736902013-01-05 06:26:39 -08001307element_iter(ElementObject *self, PyObject *args, PyObject *kwds)
Eli Bendersky64d11e62012-06-15 07:42:50 +03001308{
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001309 PyObject* tag = Py_None;
Eli Benderskya8736902013-01-05 06:26:39 -08001310 static char* kwlist[] = {"tag", 0};
1311
1312 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:iter", kwlist, &tag))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001313 return NULL;
1314
Eli Bendersky64d11e62012-06-15 07:42:50 +03001315 return create_elementiter(self, tag, 0);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001316}
1317
1318
1319static PyObject*
1320element_itertext(ElementObject* self, PyObject* args)
1321{
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001322 if (!PyArg_ParseTuple(args, ":itertext"))
1323 return NULL;
1324
Eli Bendersky64d11e62012-06-15 07:42:50 +03001325 return create_elementiter(self, Py_None, 1);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001326}
1327
Eli Bendersky64d11e62012-06-15 07:42:50 +03001328
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001329static PyObject*
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001330element_getitem(PyObject* self_, Py_ssize_t index)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001331{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001332 ElementObject* self = (ElementObject*) self_;
1333
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001334 if (!self->extra || index < 0 || index >= self->extra->length) {
1335 PyErr_SetString(
1336 PyExc_IndexError,
1337 "child index out of range"
1338 );
1339 return NULL;
1340 }
1341
1342 Py_INCREF(self->extra->children[index]);
1343 return self->extra->children[index];
1344}
1345
1346static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001347element_insert(ElementObject* self, PyObject* args)
1348{
1349 int i;
1350
1351 int index;
1352 PyObject* element;
1353 if (!PyArg_ParseTuple(args, "iO!:insert", &index,
1354 &Element_Type, &element))
1355 return NULL;
1356
Victor Stinner5f0af232013-07-11 23:01:36 +02001357 if (!self->extra) {
1358 if (create_extra(self, NULL) < 0)
1359 return NULL;
1360 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001361
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001362 if (index < 0) {
1363 index += self->extra->length;
1364 if (index < 0)
1365 index = 0;
1366 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001367 if (index > self->extra->length)
1368 index = self->extra->length;
1369
1370 if (element_resize(self, 1) < 0)
1371 return NULL;
1372
1373 for (i = self->extra->length; i > index; i--)
1374 self->extra->children[i] = self->extra->children[i-1];
1375
1376 Py_INCREF(element);
1377 self->extra->children[index] = element;
1378
1379 self->extra->length++;
1380
1381 Py_RETURN_NONE;
1382}
1383
1384static PyObject*
1385element_items(ElementObject* self, PyObject* args)
1386{
1387 if (!PyArg_ParseTuple(args, ":items"))
1388 return NULL;
1389
1390 if (!self->extra || self->extra->attrib == Py_None)
1391 return PyList_New(0);
1392
1393 return PyDict_Items(self->extra->attrib);
1394}
1395
1396static PyObject*
1397element_keys(ElementObject* self, PyObject* args)
1398{
1399 if (!PyArg_ParseTuple(args, ":keys"))
1400 return NULL;
1401
1402 if (!self->extra || self->extra->attrib == Py_None)
1403 return PyList_New(0);
1404
1405 return PyDict_Keys(self->extra->attrib);
1406}
1407
Martin v. Löwis18e16552006-02-15 17:27:45 +00001408static Py_ssize_t
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001409element_length(ElementObject* self)
1410{
1411 if (!self->extra)
1412 return 0;
1413
1414 return self->extra->length;
1415}
1416
1417static PyObject*
1418element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
1419{
1420 PyObject* elem;
1421
1422 PyObject* tag;
1423 PyObject* attrib;
1424 if (!PyArg_ParseTuple(args, "OO:makeelement", &tag, &attrib))
1425 return NULL;
1426
1427 attrib = PyDict_Copy(attrib);
1428 if (!attrib)
1429 return NULL;
1430
Eli Bendersky092af1f2012-03-04 07:14:03 +02001431 elem = create_new_element(tag, attrib);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001432
1433 Py_DECREF(attrib);
1434
1435 return elem;
1436}
1437
1438static PyObject*
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001439element_remove(ElementObject* self, PyObject* args)
1440{
1441 int i;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001442 int rc;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001443 PyObject* element;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001444 PyObject* found;
1445
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001446 if (!PyArg_ParseTuple(args, "O!:remove", &Element_Type, &element))
1447 return NULL;
1448
1449 if (!self->extra) {
1450 /* element has no children, so raise exception */
1451 PyErr_SetString(
1452 PyExc_ValueError,
1453 "list.remove(x): x not in list"
1454 );
1455 return NULL;
1456 }
1457
1458 for (i = 0; i < self->extra->length; i++) {
1459 if (self->extra->children[i] == element)
1460 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001461 rc = PyObject_RichCompareBool(self->extra->children[i], element, Py_EQ);
1462 if (rc > 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001463 break;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001464 if (rc < 0)
1465 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001466 }
1467
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001468 if (i >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001469 /* element is not in children, so raise exception */
1470 PyErr_SetString(
1471 PyExc_ValueError,
1472 "list.remove(x): x not in list"
1473 );
1474 return NULL;
1475 }
1476
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001477 found = self->extra->children[i];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001478
1479 self->extra->length--;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001480 for (; i < self->extra->length; i++)
1481 self->extra->children[i] = self->extra->children[i+1];
1482
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03001483 Py_DECREF(found);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001484 Py_RETURN_NONE;
1485}
1486
1487static PyObject*
1488element_repr(ElementObject* self)
1489{
Eli Bendersky092af1f2012-03-04 07:14:03 +02001490 if (self->tag)
1491 return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1492 else
1493 return PyUnicode_FromFormat("<Element at %p>", self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001494}
1495
1496static PyObject*
1497element_set(ElementObject* self, PyObject* args)
1498{
1499 PyObject* attrib;
1500
1501 PyObject* key;
1502 PyObject* value;
1503 if (!PyArg_ParseTuple(args, "OO:set", &key, &value))
1504 return NULL;
1505
Victor Stinner5f0af232013-07-11 23:01:36 +02001506 if (!self->extra) {
1507 if (create_extra(self, NULL) < 0)
1508 return NULL;
1509 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001510
1511 attrib = element_get_attrib(self);
1512 if (!attrib)
1513 return NULL;
1514
1515 if (PyDict_SetItem(attrib, key, value) < 0)
1516 return NULL;
1517
1518 Py_RETURN_NONE;
1519}
1520
1521static int
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001522element_setitem(PyObject* self_, Py_ssize_t index_, PyObject* item)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001523{
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00001524 ElementObject* self = (ElementObject*) self_;
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001525 int i, index;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001526 PyObject* old;
1527
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001528 if (!self->extra || index_ < 0 || index_ >= self->extra->length) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001529 PyErr_SetString(
1530 PyExc_IndexError,
1531 "child assignment index out of range");
1532 return -1;
1533 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001534 index = (int)index_;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001535
1536 old = self->extra->children[index];
1537
1538 if (item) {
1539 Py_INCREF(item);
1540 self->extra->children[index] = item;
1541 } else {
1542 self->extra->length--;
1543 for (i = index; i < self->extra->length; i++)
1544 self->extra->children[i] = self->extra->children[i+1];
1545 }
1546
1547 Py_DECREF(old);
1548
1549 return 0;
1550}
1551
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001552static PyObject*
1553element_subscr(PyObject* self_, PyObject* item)
1554{
1555 ElementObject* self = (ElementObject*) self_;
1556
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001557 if (PyIndex_Check(item)) {
1558 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001559
1560 if (i == -1 && PyErr_Occurred()) {
1561 return NULL;
1562 }
1563 if (i < 0 && self->extra)
1564 i += self->extra->length;
1565 return element_getitem(self_, i);
1566 }
1567 else if (PySlice_Check(item)) {
1568 Py_ssize_t start, stop, step, slicelen, cur, i;
1569 PyObject* list;
1570
1571 if (!self->extra)
1572 return PyList_New(0);
1573
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001574 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001575 self->extra->length,
1576 &start, &stop, &step, &slicelen) < 0) {
1577 return NULL;
1578 }
1579
1580 if (slicelen <= 0)
1581 return PyList_New(0);
1582 else {
1583 list = PyList_New(slicelen);
1584 if (!list)
1585 return NULL;
1586
1587 for (cur = start, i = 0; i < slicelen;
1588 cur += step, i++) {
1589 PyObject* item = self->extra->children[cur];
1590 Py_INCREF(item);
1591 PyList_SET_ITEM(list, i, item);
1592 }
1593
1594 return list;
1595 }
1596 }
1597 else {
1598 PyErr_SetString(PyExc_TypeError,
1599 "element indices must be integers");
1600 return NULL;
1601 }
1602}
1603
1604static int
1605element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1606{
1607 ElementObject* self = (ElementObject*) self_;
1608
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001609 if (PyIndex_Check(item)) {
1610 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001611
1612 if (i == -1 && PyErr_Occurred()) {
1613 return -1;
1614 }
1615 if (i < 0 && self->extra)
1616 i += self->extra->length;
1617 return element_setitem(self_, i, value);
1618 }
1619 else if (PySlice_Check(item)) {
1620 Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
1621
1622 PyObject* recycle = NULL;
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001623 PyObject* seq;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001624
Victor Stinner5f0af232013-07-11 23:01:36 +02001625 if (!self->extra) {
1626 if (create_extra(self, NULL) < 0)
1627 return -1;
1628 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001629
Martin v. Löwis4d0d4712010-12-03 20:14:31 +00001630 if (PySlice_GetIndicesEx(item,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001631 self->extra->length,
1632 &start, &stop, &step, &slicelen) < 0) {
1633 return -1;
1634 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001635 assert(slicelen <= self->extra->length);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001636
Eli Bendersky865756a2012-03-09 13:38:15 +02001637 if (value == NULL) {
1638 /* Delete slice */
1639 size_t cur;
1640 Py_ssize_t i;
1641
1642 if (slicelen <= 0)
1643 return 0;
1644
1645 /* Since we're deleting, the direction of the range doesn't matter,
1646 * so for simplicity make it always ascending.
1647 */
1648 if (step < 0) {
1649 stop = start + 1;
1650 start = stop + step * (slicelen - 1) - 1;
1651 step = -step;
1652 }
1653
1654 assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
1655
1656 /* recycle is a list that will contain all the children
1657 * scheduled for removal.
1658 */
1659 if (!(recycle = PyList_New(slicelen))) {
1660 PyErr_NoMemory();
1661 return -1;
1662 }
1663
1664 /* This loop walks over all the children that have to be deleted,
1665 * with cur pointing at them. num_moved is the amount of children
1666 * until the next deleted child that have to be "shifted down" to
1667 * occupy the deleted's places.
1668 * Note that in the ith iteration, shifting is done i+i places down
1669 * because i children were already removed.
1670 */
1671 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1672 /* Compute how many children have to be moved, clipping at the
1673 * list end.
1674 */
1675 Py_ssize_t num_moved = step - 1;
1676 if (cur + step >= (size_t)self->extra->length) {
1677 num_moved = self->extra->length - cur - 1;
1678 }
1679
1680 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1681
1682 memmove(
1683 self->extra->children + cur - i,
1684 self->extra->children + cur + 1,
1685 num_moved * sizeof(PyObject *));
1686 }
1687
1688 /* Leftover "tail" after the last removed child */
1689 cur = start + (size_t)slicelen * step;
1690 if (cur < (size_t)self->extra->length) {
1691 memmove(
1692 self->extra->children + cur - slicelen,
1693 self->extra->children + cur,
1694 (self->extra->length - cur) * sizeof(PyObject *));
1695 }
1696
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001697 self->extra->length -= (int)slicelen;
Eli Bendersky865756a2012-03-09 13:38:15 +02001698
1699 /* Discard the recycle list with all the deleted sub-elements */
1700 Py_XDECREF(recycle);
1701 return 0;
1702 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001703
1704 /* A new slice is actually being assigned */
1705 seq = PySequence_Fast(value, "");
1706 if (!seq) {
1707 PyErr_Format(
1708 PyExc_TypeError,
1709 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1710 );
1711 return -1;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001712 }
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001713 newlen = PySequence_Size(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001714
1715 if (step != 1 && newlen != slicelen)
1716 {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001717 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001718 PyErr_Format(PyExc_ValueError,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001719 "attempt to assign sequence of size %zd "
1720 "to extended slice of size %zd",
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001721 newlen, slicelen
1722 );
1723 return -1;
1724 }
1725
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001726 /* Resize before creating the recycle bin, to prevent refleaks. */
1727 if (newlen > slicelen) {
1728 if (element_resize(self, newlen - slicelen) < 0) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001729 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001730 return -1;
1731 }
1732 }
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001733 assert(newlen - slicelen <= INT_MAX - self->extra->length);
1734 assert(newlen - slicelen >= -self->extra->length);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001735
1736 if (slicelen > 0) {
1737 /* to avoid recursive calls to this method (via decref), move
1738 old items to the recycle bin here, and get rid of them when
1739 we're done modifying the element */
1740 recycle = PyList_New(slicelen);
1741 if (!recycle) {
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001742 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001743 return -1;
1744 }
1745 for (cur = start, i = 0; i < slicelen;
1746 cur += step, i++)
1747 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1748 }
1749
1750 if (newlen < slicelen) {
1751 /* delete slice */
1752 for (i = stop; i < self->extra->length; i++)
1753 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1754 } else if (newlen > slicelen) {
1755 /* insert slice */
1756 for (i = self->extra->length-1; i >= stop; i--)
1757 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1758 }
1759
1760 /* replace the slice */
1761 for (cur = start, i = 0; i < newlen;
1762 cur += step, i++) {
1763 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1764 Py_INCREF(element);
1765 self->extra->children[cur] = element;
1766 }
1767
Serhiy Storchaka097a6642015-11-25 20:12:37 +02001768 self->extra->length += (int)(newlen - slicelen);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001769
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02001770 Py_DECREF(seq);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001771
1772 /* discard the recycle bin, and everything in it */
1773 Py_XDECREF(recycle);
1774
1775 return 0;
1776 }
1777 else {
1778 PyErr_SetString(PyExc_TypeError,
1779 "element indices must be integers");
1780 return -1;
1781 }
1782}
1783
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001784static PyMethodDef element_methods[] = {
1785
Eli Bendersky0192ba32012-03-30 16:38:33 +03001786 {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001787
Eli Benderskya8736902013-01-05 06:26:39 -08001788 {"get", (PyCFunction) element_get, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001789 {"set", (PyCFunction) element_set, METH_VARARGS},
1790
Eli Bendersky737b1732012-05-29 06:02:56 +03001791 {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS},
1792 {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS},
1793 {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001794
1795 {"append", (PyCFunction) element_append, METH_VARARGS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001796 {"extend", (PyCFunction) element_extend, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001797 {"insert", (PyCFunction) element_insert, METH_VARARGS},
1798 {"remove", (PyCFunction) element_remove, METH_VARARGS},
1799
Eli Benderskya8736902013-01-05 06:26:39 -08001800 {"iter", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001801 {"itertext", (PyCFunction) element_itertext, METH_VARARGS},
Eli Bendersky737b1732012-05-29 06:02:56 +03001802 {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS},
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001803
Eli Benderskya8736902013-01-05 06:26:39 -08001804 {"getiterator", (PyCFunction) element_iter, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001805 {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},
1806
1807 {"items", (PyCFunction) element_items, METH_VARARGS},
1808 {"keys", (PyCFunction) element_keys, METH_VARARGS},
1809
1810 {"makeelement", (PyCFunction) element_makeelement, METH_VARARGS},
1811
1812 {"__copy__", (PyCFunction) element_copy, METH_VARARGS},
1813 {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
Martin v. Löwisbce16662012-06-17 10:41:22 +02001814 {"__sizeof__", element_sizeof, METH_NOARGS},
Eli Bendersky698bdb22013-01-10 06:01:06 -08001815 {"__getstate__", (PyCFunction)element_getstate, METH_NOARGS},
1816 {"__setstate__", (PyCFunction)element_setstate, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001817
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001818 {NULL, NULL}
1819};
1820
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001821static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001822element_getattro(ElementObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001823{
1824 PyObject* res;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001825 char *name = "";
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001826
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001827 if (PyUnicode_Check(nameobj))
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001828 name = _PyUnicode_AsString(nameobj);
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01001829
Alexander Belopolskye239d232010-12-08 23:31:48 +00001830 if (name == NULL)
1831 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001832
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001833 /* handle common attributes first */
1834 if (strcmp(name, "tag") == 0) {
1835 res = self->tag;
1836 Py_INCREF(res);
1837 return res;
1838 } else if (strcmp(name, "text") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001839 res = element_get_text(self);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02001840 Py_XINCREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001841 return res;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001842 }
1843
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001844 /* methods */
1845 res = PyObject_GenericGetAttr((PyObject*) self, nameobj);
1846 if (res)
1847 return res;
1848
1849 /* less common attributes */
1850 if (strcmp(name, "tail") == 0) {
1851 PyErr_Clear();
1852 res = element_get_tail(self);
1853 } else if (strcmp(name, "attrib") == 0) {
1854 PyErr_Clear();
Victor Stinner5f0af232013-07-11 23:01:36 +02001855 if (!self->extra) {
1856 if (create_extra(self, NULL) < 0)
1857 return NULL;
1858 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001859 res = element_get_attrib(self);
1860 }
1861
1862 if (!res)
1863 return NULL;
1864
1865 Py_INCREF(res);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001866 return res;
1867}
1868
Eli Benderskyef9683b2013-05-18 07:52:34 -07001869static int
Eli Benderskyb20df952012-05-20 06:33:29 +03001870element_setattro(ElementObject* self, PyObject* nameobj, PyObject* value)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001871{
Eli Benderskyb20df952012-05-20 06:33:29 +03001872 char *name = "";
Serhiy Storchakab6aa5372015-11-23 08:42:25 +02001873
1874 if (value == NULL) {
1875 PyErr_SetString(PyExc_AttributeError,
1876 "can't delete attribute");
1877 return -1;
1878 }
Eli Benderskyb20df952012-05-20 06:33:29 +03001879 if (PyUnicode_Check(nameobj))
1880 name = _PyUnicode_AsString(nameobj);
Victor Stinner4d463432013-07-11 23:05:03 +02001881 if (name == NULL)
Eli Benderskyef9683b2013-05-18 07:52:34 -07001882 return -1;
Victor Stinner4d463432013-07-11 23:05:03 +02001883
1884 if (strcmp(name, "tag") == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001885 Py_DECREF(self->tag);
1886 self->tag = value;
1887 Py_INCREF(self->tag);
1888 } else if (strcmp(name, "text") == 0) {
1889 Py_DECREF(JOIN_OBJ(self->text));
1890 self->text = value;
1891 Py_INCREF(self->text);
1892 } else if (strcmp(name, "tail") == 0) {
1893 Py_DECREF(JOIN_OBJ(self->tail));
1894 self->tail = value;
1895 Py_INCREF(self->tail);
1896 } else if (strcmp(name, "attrib") == 0) {
Victor Stinner5f0af232013-07-11 23:01:36 +02001897 if (!self->extra) {
1898 if (create_extra(self, NULL) < 0)
1899 return -1;
1900 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001901 Py_DECREF(self->extra->attrib);
1902 self->extra->attrib = value;
1903 Py_INCREF(self->extra->attrib);
1904 } else {
Eli Benderskyef9683b2013-05-18 07:52:34 -07001905 PyErr_SetString(PyExc_AttributeError,
Eli Bendersky6a55dc32013-05-19 16:59:59 -07001906 "Can't set arbitrary attributes on Element");
Eli Benderskyef9683b2013-05-18 07:52:34 -07001907 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001908 }
1909
Eli Benderskyef9683b2013-05-18 07:52:34 -07001910 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001911}
1912
1913static PySequenceMethods element_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001914 (lenfunc) element_length,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001915 0, /* sq_concat */
1916 0, /* sq_repeat */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001917 element_getitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001918 0,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 element_setitem,
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001920 0,
1921};
1922
1923static PyMappingMethods element_as_mapping = {
1924 (lenfunc) element_length,
1925 (binaryfunc) element_subscr,
1926 (objobjargproc) element_ass_subscr,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001927};
1928
Neal Norwitz227b5332006-03-22 09:28:35 +00001929static PyTypeObject Element_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001930 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08001931 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001932 /* methods */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001933 (destructor)element_dealloc, /* tp_dealloc */
1934 0, /* tp_print */
1935 0, /* tp_getattr */
Eli Benderskyb20df952012-05-20 06:33:29 +03001936 0, /* tp_setattr */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001937 0, /* tp_reserved */
1938 (reprfunc)element_repr, /* tp_repr */
1939 0, /* tp_as_number */
1940 &element_as_sequence, /* tp_as_sequence */
1941 &element_as_mapping, /* tp_as_mapping */
1942 0, /* tp_hash */
1943 0, /* tp_call */
1944 0, /* tp_str */
1945 (getattrofunc)element_getattro, /* tp_getattro */
Eli Benderskyb20df952012-05-20 06:33:29 +03001946 (setattrofunc)element_setattro, /* tp_setattro */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001947 0, /* tp_as_buffer */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001948 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1949 /* tp_flags */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001950 0, /* tp_doc */
Eli Bendersky0192ba32012-03-30 16:38:33 +03001951 (traverseproc)element_gc_traverse, /* tp_traverse */
1952 (inquiry)element_gc_clear, /* tp_clear */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001953 0, /* tp_richcompare */
Eli Benderskyebf37a22012-04-03 22:02:37 +03001954 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
Eli Bendersky092af1f2012-03-04 07:14:03 +02001955 0, /* tp_iter */
1956 0, /* tp_iternext */
1957 element_methods, /* tp_methods */
1958 0, /* tp_members */
1959 0, /* tp_getset */
1960 0, /* tp_base */
1961 0, /* tp_dict */
1962 0, /* tp_descr_get */
1963 0, /* tp_descr_set */
1964 0, /* tp_dictoffset */
1965 (initproc)element_init, /* tp_init */
1966 PyType_GenericAlloc, /* tp_alloc */
1967 element_new, /* tp_new */
1968 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00001969};
1970
Eli Bendersky64d11e62012-06-15 07:42:50 +03001971/******************************* Element iterator ****************************/
1972
1973/* ElementIterObject represents the iteration state over an XML element in
1974 * pre-order traversal. To keep track of which sub-element should be returned
1975 * next, a stack of parents is maintained. This is a standard stack-based
1976 * iterative pre-order traversal of a tree.
1977 * The stack is managed using a single-linked list starting at parent_stack.
1978 * Each stack node contains the saved parent to which we should return after
1979 * the current one is exhausted, and the next child to examine in that parent.
1980 */
1981typedef struct ParentLocator_t {
1982 ElementObject *parent;
1983 Py_ssize_t child_index;
1984 struct ParentLocator_t *next;
1985} ParentLocator;
1986
1987typedef struct {
1988 PyObject_HEAD
1989 ParentLocator *parent_stack;
1990 ElementObject *root_element;
1991 PyObject *sought_tag;
1992 int root_done;
1993 int gettext;
1994} ElementIterObject;
1995
1996
1997static void
1998elementiter_dealloc(ElementIterObject *it)
1999{
2000 ParentLocator *p = it->parent_stack;
2001 while (p) {
2002 ParentLocator *temp = p;
2003 Py_XDECREF(p->parent);
2004 p = p->next;
2005 PyObject_Free(temp);
2006 }
2007
2008 Py_XDECREF(it->sought_tag);
2009 Py_XDECREF(it->root_element);
2010
2011 PyObject_GC_UnTrack(it);
2012 PyObject_GC_Del(it);
2013}
2014
2015static int
2016elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2017{
2018 ParentLocator *p = it->parent_stack;
2019 while (p) {
2020 Py_VISIT(p->parent);
2021 p = p->next;
2022 }
2023
2024 Py_VISIT(it->root_element);
2025 Py_VISIT(it->sought_tag);
2026 return 0;
2027}
2028
2029/* Helper function for elementiter_next. Add a new parent to the parent stack.
2030 */
2031static ParentLocator *
2032parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
2033{
2034 ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
2035 if (new_node) {
2036 new_node->parent = parent;
2037 Py_INCREF(parent);
2038 new_node->child_index = 0;
2039 new_node->next = stack;
2040 }
2041 return new_node;
2042}
2043
2044static PyObject *
2045elementiter_next(ElementIterObject *it)
2046{
2047 /* Sub-element iterator.
Eli Bendersky45839902013-01-13 05:14:47 -08002048 *
Eli Bendersky64d11e62012-06-15 07:42:50 +03002049 * A short note on gettext: this function serves both the iter() and
2050 * itertext() methods to avoid code duplication. However, there are a few
2051 * small differences in the way these iterations work. Namely:
2052 * - itertext() only yields text from nodes that have it, and continues
2053 * iterating when a node doesn't have text (so it doesn't return any
2054 * node like iter())
2055 * - itertext() also has to handle tail, after finishing with all the
2056 * children of a node.
2057 */
Eli Bendersky113da642012-06-15 07:52:49 +03002058 ElementObject *cur_parent;
2059 Py_ssize_t child_index;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002060 int rc;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002061
2062 while (1) {
2063 /* Handle the case reached in the beginning and end of iteration, where
2064 * the parent stack is empty. The root_done flag gives us indication
2065 * whether we've just started iterating (so root_done is 0), in which
2066 * case the root is returned. If root_done is 1 and we're here, the
2067 * iterator is exhausted.
2068 */
2069 if (!it->parent_stack->parent) {
2070 if (it->root_done) {
2071 PyErr_SetNone(PyExc_StopIteration);
2072 return NULL;
2073 } else {
2074 it->parent_stack = parent_stack_push_new(it->parent_stack,
2075 it->root_element);
2076 if (!it->parent_stack) {
2077 PyErr_NoMemory();
2078 return NULL;
2079 }
2080
2081 it->root_done = 1;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002082 rc = (it->sought_tag == Py_None);
2083 if (!rc) {
2084 rc = PyObject_RichCompareBool(it->root_element->tag,
2085 it->sought_tag, Py_EQ);
2086 if (rc < 0)
2087 return NULL;
2088 }
2089 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002090 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002091 PyObject *text = element_get_text(it->root_element);
2092 if (!text)
2093 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002094 rc = PyObject_IsTrue(text);
2095 if (rc < 0)
2096 return NULL;
2097 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002098 Py_INCREF(text);
2099 return text;
2100 }
2101 } else {
2102 Py_INCREF(it->root_element);
2103 return (PyObject *)it->root_element;
2104 }
2105 }
2106 }
2107 }
2108
2109 /* See if there are children left to traverse in the current parent. If
2110 * yes, visit the next child. If not, pop the stack and try again.
2111 */
Eli Bendersky113da642012-06-15 07:52:49 +03002112 cur_parent = it->parent_stack->parent;
2113 child_index = it->parent_stack->child_index;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002114 if (cur_parent->extra && child_index < cur_parent->extra->length) {
2115 ElementObject *child = (ElementObject *)
2116 cur_parent->extra->children[child_index];
2117 it->parent_stack->child_index++;
2118 it->parent_stack = parent_stack_push_new(it->parent_stack,
2119 child);
2120 if (!it->parent_stack) {
2121 PyErr_NoMemory();
2122 return NULL;
2123 }
2124
2125 if (it->gettext) {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002126 PyObject *text = element_get_text(child);
2127 if (!text)
2128 return NULL;
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002129 rc = PyObject_IsTrue(text);
2130 if (rc < 0)
2131 return NULL;
2132 if (rc) {
Eli Bendersky64d11e62012-06-15 07:42:50 +03002133 Py_INCREF(text);
2134 return text;
2135 }
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002136 } else {
2137 rc = (it->sought_tag == Py_None);
2138 if (!rc) {
2139 rc = PyObject_RichCompareBool(child->tag,
2140 it->sought_tag, Py_EQ);
2141 if (rc < 0)
2142 return NULL;
2143 }
2144 if (rc) {
2145 Py_INCREF(child);
2146 return (PyObject *)child;
2147 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002148 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002149 }
2150 else {
Eli Benderskye6174ca2013-01-10 06:27:53 -08002151 PyObject *tail;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002152 ParentLocator *next = it->parent_stack->next;
Eli Benderskye6174ca2013-01-10 06:27:53 -08002153 if (it->gettext) {
2154 tail = element_get_tail(cur_parent);
2155 if (!tail)
2156 return NULL;
2157 }
2158 else
2159 tail = Py_None;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002160 Py_XDECREF(it->parent_stack->parent);
2161 PyObject_Free(it->parent_stack);
2162 it->parent_stack = next;
2163
2164 /* Note that extra condition on it->parent_stack->parent here;
2165 * this is because itertext() is supposed to only return *inner*
2166 * text, not text following the element it began iteration with.
2167 */
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002168 if (it->parent_stack->parent) {
2169 rc = PyObject_IsTrue(tail);
2170 if (rc < 0)
2171 return NULL;
2172 if (rc) {
2173 Py_INCREF(tail);
2174 return tail;
2175 }
Eli Bendersky64d11e62012-06-15 07:42:50 +03002176 }
2177 }
2178 }
2179
2180 return NULL;
2181}
2182
2183
2184static PyTypeObject ElementIter_Type = {
2185 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002186 /* Using the module's name since the pure-Python implementation does not
2187 have such a type. */
Eli Bendersky64d11e62012-06-15 07:42:50 +03002188 "_elementtree._element_iterator", /* tp_name */
2189 sizeof(ElementIterObject), /* tp_basicsize */
2190 0, /* tp_itemsize */
2191 /* methods */
2192 (destructor)elementiter_dealloc, /* tp_dealloc */
2193 0, /* tp_print */
2194 0, /* tp_getattr */
2195 0, /* tp_setattr */
2196 0, /* tp_reserved */
2197 0, /* tp_repr */
2198 0, /* tp_as_number */
2199 0, /* tp_as_sequence */
2200 0, /* tp_as_mapping */
2201 0, /* tp_hash */
2202 0, /* tp_call */
2203 0, /* tp_str */
2204 0, /* tp_getattro */
2205 0, /* tp_setattro */
2206 0, /* tp_as_buffer */
2207 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2208 0, /* tp_doc */
2209 (traverseproc)elementiter_traverse, /* tp_traverse */
2210 0, /* tp_clear */
2211 0, /* tp_richcompare */
2212 0, /* tp_weaklistoffset */
2213 PyObject_SelfIter, /* tp_iter */
2214 (iternextfunc)elementiter_next, /* tp_iternext */
2215 0, /* tp_methods */
2216 0, /* tp_members */
2217 0, /* tp_getset */
2218 0, /* tp_base */
2219 0, /* tp_dict */
2220 0, /* tp_descr_get */
2221 0, /* tp_descr_set */
2222 0, /* tp_dictoffset */
2223 0, /* tp_init */
2224 0, /* tp_alloc */
2225 0, /* tp_new */
2226};
2227
2228
2229static PyObject *
2230create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2231{
2232 ElementIterObject *it;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002233
2234 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2235 if (!it)
2236 return NULL;
Eli Bendersky64d11e62012-06-15 07:42:50 +03002237
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002238 if (PyUnicode_Check(tag)) {
2239 if (PyUnicode_READY(tag) < 0)
2240 return NULL;
2241 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
2242 tag = Py_None;
2243 }
2244 else if (PyBytes_Check(tag)) {
2245 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
2246 tag = Py_None;
2247 }
Victor Stinner4d463432013-07-11 23:05:03 +02002248
2249 Py_INCREF(tag);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002250 it->sought_tag = tag;
2251 it->root_done = 0;
2252 it->gettext = gettext;
Victor Stinner4d463432013-07-11 23:05:03 +02002253 Py_INCREF(self);
Eli Bendersky64d11e62012-06-15 07:42:50 +03002254 it->root_element = self;
2255
Eli Bendersky64d11e62012-06-15 07:42:50 +03002256 PyObject_GC_Track(it);
Victor Stinnerd917dcb2013-07-12 02:05:17 +02002257
2258 it->parent_stack = PyObject_Malloc(sizeof(ParentLocator));
2259 if (it->parent_stack == NULL) {
2260 Py_DECREF(it);
2261 PyErr_NoMemory();
2262 return NULL;
2263 }
2264 it->parent_stack->parent = NULL;
2265 it->parent_stack->child_index = 0;
2266 it->parent_stack->next = NULL;
2267
Eli Bendersky64d11e62012-06-15 07:42:50 +03002268 return (PyObject *)it;
2269}
2270
2271
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002272/* ==================================================================== */
2273/* the tree builder type */
2274
2275typedef struct {
2276 PyObject_HEAD
2277
Eli Bendersky58d548d2012-05-29 15:45:16 +03002278 PyObject *root; /* root node (first created node) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002279
Antoine Pitrouee329312012-10-04 19:53:29 +02002280 PyObject *this; /* current node */
2281 PyObject *last; /* most recently created node */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002282
Eli Bendersky58d548d2012-05-29 15:45:16 +03002283 PyObject *data; /* data collector (string or list), or NULL */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002284
Eli Bendersky58d548d2012-05-29 15:45:16 +03002285 PyObject *stack; /* element stack */
2286 Py_ssize_t index; /* current stack size (0 means empty) */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002287
Eli Bendersky48d358b2012-05-30 17:57:50 +03002288 PyObject *element_factory;
2289
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002290 /* element tracing */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002291 PyObject *events; /* list of events, or NULL if not collecting */
2292 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2293 PyObject *end_event_obj;
2294 PyObject *start_ns_event_obj;
2295 PyObject *end_ns_event_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002296} TreeBuilderObject;
2297
Christian Heimes90aa7642007-12-19 02:45:37 +00002298#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002299
2300/* -------------------------------------------------------------------- */
2301/* constructor and destructor */
2302
Eli Bendersky58d548d2012-05-29 15:45:16 +03002303static PyObject *
2304treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002305{
Eli Bendersky58d548d2012-05-29 15:45:16 +03002306 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2307 if (t != NULL) {
2308 t->root = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002309
Eli Bendersky58d548d2012-05-29 15:45:16 +03002310 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002311 t->this = Py_None;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002312 Py_INCREF(Py_None);
Antoine Pitrouee329312012-10-04 19:53:29 +02002313 t->last = Py_None;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002314
Eli Bendersky58d548d2012-05-29 15:45:16 +03002315 t->data = NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002316 t->element_factory = NULL;
Eli Bendersky58d548d2012-05-29 15:45:16 +03002317 t->stack = PyList_New(20);
2318 if (!t->stack) {
2319 Py_DECREF(t->this);
2320 Py_DECREF(t->last);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002321 Py_DECREF((PyObject *) t);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002322 return NULL;
2323 }
2324 t->index = 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002325
Eli Bendersky58d548d2012-05-29 15:45:16 +03002326 t->events = NULL;
2327 t->start_event_obj = t->end_event_obj = NULL;
2328 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2329 }
2330 return (PyObject *)t;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002331}
2332
Eli Bendersky58d548d2012-05-29 15:45:16 +03002333static int
2334treebuilder_init(PyObject *self, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002335{
Eli Benderskyc68e1362012-06-03 06:09:42 +03002336 static char *kwlist[] = {"element_factory", 0};
Eli Bendersky48d358b2012-05-30 17:57:50 +03002337 PyObject *element_factory = NULL;
2338 TreeBuilderObject *self_tb = (TreeBuilderObject *)self;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002339 PyObject *tmp;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002340
2341 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:TreeBuilder", kwlist,
2342 &element_factory)) {
2343 return -1;
2344 }
2345
2346 if (element_factory) {
2347 Py_INCREF(element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002348 tmp = self_tb->element_factory;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002349 self_tb->element_factory = element_factory;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002350 Py_XDECREF(tmp);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002351 }
2352
Eli Bendersky58d548d2012-05-29 15:45:16 +03002353 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002354}
2355
Eli Bendersky48d358b2012-05-30 17:57:50 +03002356static int
2357treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2358{
2359 Py_VISIT(self->root);
2360 Py_VISIT(self->this);
2361 Py_VISIT(self->last);
2362 Py_VISIT(self->data);
2363 Py_VISIT(self->stack);
2364 Py_VISIT(self->element_factory);
2365 return 0;
2366}
2367
2368static int
2369treebuilder_gc_clear(TreeBuilderObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002370{
Antoine Pitrouc1948842012-10-01 23:40:37 +02002371 Py_CLEAR(self->end_ns_event_obj);
2372 Py_CLEAR(self->start_ns_event_obj);
2373 Py_CLEAR(self->end_event_obj);
2374 Py_CLEAR(self->start_event_obj);
2375 Py_CLEAR(self->events);
2376 Py_CLEAR(self->stack);
2377 Py_CLEAR(self->data);
2378 Py_CLEAR(self->last);
2379 Py_CLEAR(self->this);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002380 Py_CLEAR(self->element_factory);
Antoine Pitrouc1948842012-10-01 23:40:37 +02002381 Py_CLEAR(self->root);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002382 return 0;
2383}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002384
Eli Bendersky48d358b2012-05-30 17:57:50 +03002385static void
2386treebuilder_dealloc(TreeBuilderObject *self)
2387{
2388 PyObject_GC_UnTrack(self);
2389 treebuilder_gc_clear(self);
Eli Bendersky58d548d2012-05-29 15:45:16 +03002390 Py_TYPE(self)->tp_free((PyObject *)self);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002391}
2392
2393/* -------------------------------------------------------------------- */
Antoine Pitrouee329312012-10-04 19:53:29 +02002394/* helpers for handling of arbitrary element-like objects */
2395
2396static int
2397treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
2398 PyObject **dest, _Py_Identifier *name)
2399{
2400 if (Element_CheckExact(element)) {
2401 Py_DECREF(JOIN_OBJ(*dest));
2402 *dest = JOIN_SET(data, PyList_CheckExact(data));
2403 return 0;
2404 }
2405 else {
2406 PyObject *joined = list_join(data);
2407 int r;
2408 if (joined == NULL)
2409 return -1;
2410 r = _PyObject_SetAttrId(element, name, joined);
2411 Py_DECREF(joined);
2412 return r;
2413 }
2414}
2415
2416/* These two functions steal a reference to data */
2417static int
2418treebuilder_set_element_text(PyObject *element, PyObject *data)
2419{
2420 _Py_IDENTIFIER(text);
2421 return treebuilder_set_element_text_or_tail(
2422 element, data, &((ElementObject *) element)->text, &PyId_text);
2423}
2424
2425static int
2426treebuilder_set_element_tail(PyObject *element, PyObject *data)
2427{
2428 _Py_IDENTIFIER(tail);
2429 return treebuilder_set_element_text_or_tail(
2430 element, data, &((ElementObject *) element)->tail, &PyId_tail);
2431}
2432
2433static int
2434treebuilder_add_subelement(PyObject *element, PyObject *child)
2435{
2436 _Py_IDENTIFIER(append);
2437 if (Element_CheckExact(element)) {
2438 ElementObject *elem = (ElementObject *) element;
2439 return element_add_subelement(elem, child);
2440 }
2441 else {
2442 PyObject *res;
2443 res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
2444 if (res == NULL)
2445 return -1;
2446 Py_DECREF(res);
2447 return 0;
2448 }
2449}
2450
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002451LOCAL(int)
2452treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2453 PyObject *node)
2454{
2455 if (action != NULL) {
2456 PyObject *res = PyTuple_Pack(2, action, node);
2457 if (res == NULL)
2458 return -1;
2459 if (PyList_Append(self->events, res) < 0) {
2460 Py_DECREF(res);
2461 return -1;
2462 }
2463 Py_DECREF(res);
2464 }
2465 return 0;
2466}
2467
Antoine Pitrouee329312012-10-04 19:53:29 +02002468/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002469/* handlers */
2470
2471LOCAL(PyObject*)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002472treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2473 PyObject* attrib)
2474{
2475 PyObject* node;
2476 PyObject* this;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002477 elementtreestate *st = ET_STATE_GLOBAL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002478
2479 if (self->data) {
2480 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002481 if (treebuilder_set_element_text(self->last, self->data))
2482 return NULL;
2483 }
2484 else {
2485 if (treebuilder_set_element_tail(self->last, self->data))
2486 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002487 }
2488 self->data = NULL;
2489 }
2490
Eli Bendersky08231a92013-05-18 15:47:16 -07002491 if (self->element_factory && self->element_factory != Py_None) {
Eli Bendersky48d358b2012-05-30 17:57:50 +03002492 node = PyObject_CallFunction(self->element_factory, "OO", tag, attrib);
2493 } else {
2494 node = create_new_element(tag, attrib);
2495 }
2496 if (!node) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002497 return NULL;
Eli Bendersky48d358b2012-05-30 17:57:50 +03002498 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002499
Antoine Pitrouee329312012-10-04 19:53:29 +02002500 this = self->this;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002501
2502 if (this != Py_None) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002503 if (treebuilder_add_subelement(this, node) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002504 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002505 } else {
2506 if (self->root) {
2507 PyErr_SetString(
Eli Bendersky532d03e2013-08-10 08:00:39 -07002508 st->parseerror_obj,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002509 "multiple elements on top level"
2510 );
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002511 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002512 }
2513 Py_INCREF(node);
2514 self->root = node;
2515 }
2516
2517 if (self->index < PyList_GET_SIZE(self->stack)) {
2518 if (PyList_SetItem(self->stack, self->index, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002519 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002520 Py_INCREF(this);
2521 } else {
2522 if (PyList_Append(self->stack, this) < 0)
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002523 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002524 }
2525 self->index++;
2526
2527 Py_DECREF(this);
2528 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002529 self->this = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002530
2531 Py_DECREF(self->last);
2532 Py_INCREF(node);
Antoine Pitrouee329312012-10-04 19:53:29 +02002533 self->last = node;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002534
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002535 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2536 goto error;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002537
2538 return node;
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00002539
2540 error:
2541 Py_DECREF(node);
2542 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002543}
2544
2545LOCAL(PyObject*)
2546treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2547{
2548 if (!self->data) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002549 if (self->last == Py_None) {
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00002550 /* ignore calls to data before the first call to start */
2551 Py_RETURN_NONE;
2552 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002553 /* store the first item as is */
2554 Py_INCREF(data); self->data = data;
2555 } else {
2556 /* more than one item; use a list to collect items */
Christian Heimes72b710a2008-05-26 13:28:38 +00002557 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2558 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002559 /* XXX this code path unused in Python 3? */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002560 /* expat often generates single character data sections; handle
2561 the most common case by resizing the existing string... */
Christian Heimes72b710a2008-05-26 13:28:38 +00002562 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2563 if (_PyBytes_Resize(&self->data, size + 1) < 0)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002564 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +00002565 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002566 } else if (PyList_CheckExact(self->data)) {
2567 if (PyList_Append(self->data, data) < 0)
2568 return NULL;
2569 } else {
2570 PyObject* list = PyList_New(2);
2571 if (!list)
2572 return NULL;
2573 PyList_SET_ITEM(list, 0, self->data);
2574 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2575 self->data = list;
2576 }
2577 }
2578
2579 Py_RETURN_NONE;
2580}
2581
2582LOCAL(PyObject*)
2583treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2584{
2585 PyObject* item;
2586
2587 if (self->data) {
2588 if (self->this == self->last) {
Antoine Pitrouee329312012-10-04 19:53:29 +02002589 if (treebuilder_set_element_text(self->last, self->data))
2590 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002591 } else {
Antoine Pitrouee329312012-10-04 19:53:29 +02002592 if (treebuilder_set_element_tail(self->last, self->data))
2593 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002594 }
2595 self->data = NULL;
2596 }
2597
2598 if (self->index == 0) {
2599 PyErr_SetString(
2600 PyExc_IndexError,
2601 "pop from empty stack"
2602 );
2603 return NULL;
2604 }
2605
2606 self->index--;
2607
2608 item = PyList_GET_ITEM(self->stack, self->index);
2609 Py_INCREF(item);
2610
2611 Py_DECREF(self->last);
2612
Antoine Pitrouee329312012-10-04 19:53:29 +02002613 self->last = self->this;
2614 self->this = item;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002615
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02002616 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2617 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002618
2619 Py_INCREF(self->last);
2620 return (PyObject*) self->last;
2621}
2622
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002623/* -------------------------------------------------------------------- */
2624/* methods (in alphabetical order) */
2625
2626static PyObject*
2627treebuilder_data(TreeBuilderObject* self, PyObject* args)
2628{
2629 PyObject* data;
2630 if (!PyArg_ParseTuple(args, "O:data", &data))
2631 return NULL;
2632
2633 return treebuilder_handle_data(self, data);
2634}
2635
2636static PyObject*
2637treebuilder_end(TreeBuilderObject* self, PyObject* args)
2638{
2639 PyObject* tag;
2640 if (!PyArg_ParseTuple(args, "O:end", &tag))
2641 return NULL;
2642
2643 return treebuilder_handle_end(self, tag);
2644}
2645
2646LOCAL(PyObject*)
2647treebuilder_done(TreeBuilderObject* self)
2648{
2649 PyObject* res;
2650
2651 /* FIXME: check stack size? */
2652
2653 if (self->root)
2654 res = self->root;
2655 else
2656 res = Py_None;
2657
2658 Py_INCREF(res);
2659 return res;
2660}
2661
2662static PyObject*
2663treebuilder_close(TreeBuilderObject* self, PyObject* args)
2664{
2665 if (!PyArg_ParseTuple(args, ":close"))
2666 return NULL;
2667
2668 return treebuilder_done(self);
2669}
2670
2671static PyObject*
2672treebuilder_start(TreeBuilderObject* self, PyObject* args)
2673{
2674 PyObject* tag;
2675 PyObject* attrib = Py_None;
2676 if (!PyArg_ParseTuple(args, "O|O:start", &tag, &attrib))
2677 return NULL;
2678
2679 return treebuilder_handle_start(self, tag, attrib);
2680}
2681
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002682static PyMethodDef treebuilder_methods[] = {
2683 {"data", (PyCFunction) treebuilder_data, METH_VARARGS},
2684 {"start", (PyCFunction) treebuilder_start, METH_VARARGS},
2685 {"end", (PyCFunction) treebuilder_end, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002686 {"close", (PyCFunction) treebuilder_close, METH_VARARGS},
2687 {NULL, NULL}
2688};
2689
Neal Norwitz227b5332006-03-22 09:28:35 +00002690static PyTypeObject TreeBuilder_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002691 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002692 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002693 /* methods */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002694 (destructor)treebuilder_dealloc, /* tp_dealloc */
2695 0, /* tp_print */
2696 0, /* tp_getattr */
2697 0, /* tp_setattr */
2698 0, /* tp_reserved */
2699 0, /* tp_repr */
2700 0, /* tp_as_number */
2701 0, /* tp_as_sequence */
2702 0, /* tp_as_mapping */
2703 0, /* tp_hash */
2704 0, /* tp_call */
2705 0, /* tp_str */
2706 0, /* tp_getattro */
2707 0, /* tp_setattro */
2708 0, /* tp_as_buffer */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002709 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2710 /* tp_flags */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002711 0, /* tp_doc */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002712 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
2713 (inquiry)treebuilder_gc_clear, /* tp_clear */
Eli Bendersky58d548d2012-05-29 15:45:16 +03002714 0, /* tp_richcompare */
2715 0, /* tp_weaklistoffset */
2716 0, /* tp_iter */
2717 0, /* tp_iternext */
2718 treebuilder_methods, /* tp_methods */
2719 0, /* tp_members */
2720 0, /* tp_getset */
2721 0, /* tp_base */
2722 0, /* tp_dict */
2723 0, /* tp_descr_get */
2724 0, /* tp_descr_set */
2725 0, /* tp_dictoffset */
2726 (initproc)treebuilder_init, /* tp_init */
2727 PyType_GenericAlloc, /* tp_alloc */
2728 treebuilder_new, /* tp_new */
2729 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002730};
2731
2732/* ==================================================================== */
2733/* the expat interface */
2734
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002735#include "expat.h"
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002736#include "pyexpat.h"
Eli Bendersky532d03e2013-08-10 08:00:39 -07002737
2738/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
2739 * cached globally without being in per-module state.
2740 */
Eli Bendersky20d41742012-06-01 09:48:37 +03002741static struct PyExpat_CAPI *expat_capi;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002742#define EXPAT(func) (expat_capi->func)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002743
Eli Bendersky52467b12012-06-01 07:13:08 +03002744static XML_Memory_Handling_Suite ExpatMemoryHandler = {
2745 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
2746
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002747typedef struct {
2748 PyObject_HEAD
2749
2750 XML_Parser parser;
2751
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002752 PyObject *target;
2753 PyObject *entity;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002754
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002755 PyObject *names;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002756
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002757 PyObject *handle_start;
2758 PyObject *handle_data;
2759 PyObject *handle_end;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002760
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002761 PyObject *handle_comment;
2762 PyObject *handle_pi;
2763 PyObject *handle_doctype;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002764
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002765 PyObject *handle_close;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002766
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002767} XMLParserObject;
2768
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03002769static PyObject* xmlparser_doctype(XMLParserObject* self, PyObject* args);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03002770
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002771/* helpers */
2772
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002773LOCAL(PyObject*)
2774makeuniversal(XMLParserObject* self, const char* string)
2775{
2776 /* convert a UTF-8 tag/attribute name from the expat parser
2777 to a universal name string */
2778
Antoine Pitrouc1948842012-10-01 23:40:37 +02002779 Py_ssize_t size = (Py_ssize_t) strlen(string);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002780 PyObject* key;
2781 PyObject* value;
2782
2783 /* look the 'raw' name up in the names dictionary */
Christian Heimes72b710a2008-05-26 13:28:38 +00002784 key = PyBytes_FromStringAndSize(string, size);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002785 if (!key)
2786 return NULL;
2787
2788 value = PyDict_GetItem(self->names, key);
2789
2790 if (value) {
2791 Py_INCREF(value);
2792 } else {
2793 /* new name. convert to universal name, and decode as
2794 necessary */
2795
2796 PyObject* tag;
2797 char* p;
Antoine Pitrouc1948842012-10-01 23:40:37 +02002798 Py_ssize_t i;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002799
2800 /* look for namespace separator */
2801 for (i = 0; i < size; i++)
2802 if (string[i] == '}')
2803 break;
2804 if (i != size) {
2805 /* convert to universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002806 tag = PyBytes_FromStringAndSize(NULL, size+1);
Victor Stinner71c8b7e2013-07-11 23:08:39 +02002807 if (tag == NULL) {
2808 Py_DECREF(key);
2809 return NULL;
2810 }
Christian Heimes72b710a2008-05-26 13:28:38 +00002811 p = PyBytes_AS_STRING(tag);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002812 p[0] = '{';
2813 memcpy(p+1, string, size);
2814 size++;
2815 } else {
2816 /* plain name; use key as tag */
2817 Py_INCREF(key);
2818 tag = key;
2819 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01002820
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002821 /* decode universal name */
Christian Heimes72b710a2008-05-26 13:28:38 +00002822 p = PyBytes_AS_STRING(tag);
Neal Norwitz0269b912007-08-08 06:56:02 +00002823 value = PyUnicode_DecodeUTF8(p, size, "strict");
2824 Py_DECREF(tag);
2825 if (!value) {
2826 Py_DECREF(key);
2827 return NULL;
2828 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002829
2830 /* add to names dictionary */
2831 if (PyDict_SetItem(self->names, key, value) < 0) {
2832 Py_DECREF(key);
2833 Py_DECREF(value);
2834 return NULL;
2835 }
2836 }
2837
2838 Py_DECREF(key);
2839 return value;
2840}
2841
Eli Bendersky5b77d812012-03-16 08:20:05 +02002842/* Set the ParseError exception with the given parameters.
2843 * If message is not NULL, it's used as the error string. Otherwise, the
2844 * message string is the default for the given error_code.
2845*/
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002846static void
Eli Bendersky5b77d812012-03-16 08:20:05 +02002847expat_set_error(enum XML_Error error_code, int line, int column, char *message)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002848{
Eli Bendersky5b77d812012-03-16 08:20:05 +02002849 PyObject *errmsg, *error, *position, *code;
Eli Bendersky532d03e2013-08-10 08:00:39 -07002850 elementtreestate *st = ET_STATE_GLOBAL;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002851
Victor Stinner499dfcf2011-03-21 13:26:24 +01002852 errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
Eli Bendersky5b77d812012-03-16 08:20:05 +02002853 message ? message : EXPAT(ErrorString)(error_code),
2854 line, column);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002855 if (errmsg == NULL)
2856 return;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002857
Eli Bendersky532d03e2013-08-10 08:00:39 -07002858 error = PyObject_CallFunction(st->parseerror_obj, "O", errmsg);
Victor Stinner499dfcf2011-03-21 13:26:24 +01002859 Py_DECREF(errmsg);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002860 if (!error)
2861 return;
2862
Eli Bendersky5b77d812012-03-16 08:20:05 +02002863 /* Add code and position attributes */
2864 code = PyLong_FromLong((long)error_code);
2865 if (!code) {
2866 Py_DECREF(error);
2867 return;
2868 }
2869 if (PyObject_SetAttrString(error, "code", code) == -1) {
2870 Py_DECREF(error);
2871 Py_DECREF(code);
2872 return;
2873 }
2874 Py_DECREF(code);
2875
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002876 position = Py_BuildValue("(ii)", line, column);
2877 if (!position) {
2878 Py_DECREF(error);
2879 return;
2880 }
2881 if (PyObject_SetAttrString(error, "position", position) == -1) {
2882 Py_DECREF(error);
2883 Py_DECREF(position);
2884 return;
2885 }
2886 Py_DECREF(position);
2887
Eli Bendersky532d03e2013-08-10 08:00:39 -07002888 PyErr_SetObject(st->parseerror_obj, error);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002889 Py_DECREF(error);
2890}
2891
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002892/* -------------------------------------------------------------------- */
2893/* handlers */
2894
2895static void
2896expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
2897 int data_len)
2898{
2899 PyObject* key;
2900 PyObject* value;
2901 PyObject* res;
2902
2903 if (data_len < 2 || data_in[0] != '&')
2904 return;
2905
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002906 if (PyErr_Occurred())
2907 return;
2908
Neal Norwitz0269b912007-08-08 06:56:02 +00002909 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002910 if (!key)
2911 return;
2912
2913 value = PyDict_GetItem(self->entity, key);
2914
2915 if (value) {
2916 if (TreeBuilder_CheckExact(self->target))
2917 res = treebuilder_handle_data(
2918 (TreeBuilderObject*) self->target, value
2919 );
2920 else if (self->handle_data)
2921 res = PyObject_CallFunction(self->handle_data, "O", value);
2922 else
2923 res = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002924 Py_XDECREF(res);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002925 } else if (!PyErr_Occurred()) {
2926 /* Report the first error, not the last */
Alexander Belopolskye239d232010-12-08 23:31:48 +00002927 char message[128] = "undefined entity ";
2928 strncat(message, data_in, data_len < 100?data_len:100);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002929 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02002930 XML_ERROR_UNDEFINED_ENTITY,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002931 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02002932 EXPAT(GetErrorColumnNumber)(self->parser),
2933 message
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002934 );
2935 }
2936
2937 Py_DECREF(key);
2938}
2939
2940static void
2941expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
2942 const XML_Char **attrib_in)
2943{
2944 PyObject* res;
2945 PyObject* tag;
2946 PyObject* attrib;
2947 int ok;
2948
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02002949 if (PyErr_Occurred())
2950 return;
2951
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002952 /* tag name */
2953 tag = makeuniversal(self, tag_in);
2954 if (!tag)
2955 return; /* parser will look for errors */
2956
2957 /* attributes */
2958 if (attrib_in[0]) {
2959 attrib = PyDict_New();
2960 if (!attrib)
2961 return;
2962 while (attrib_in[0] && attrib_in[1]) {
2963 PyObject* key = makeuniversal(self, attrib_in[0]);
Neal Norwitz0269b912007-08-08 06:56:02 +00002964 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002965 if (!key || !value) {
2966 Py_XDECREF(value);
2967 Py_XDECREF(key);
2968 Py_DECREF(attrib);
2969 return;
2970 }
2971 ok = PyDict_SetItem(attrib, key, value);
2972 Py_DECREF(value);
2973 Py_DECREF(key);
2974 if (ok < 0) {
2975 Py_DECREF(attrib);
2976 return;
2977 }
2978 attrib_in += 2;
2979 }
2980 } else {
Antoine Pitrouc1948842012-10-01 23:40:37 +02002981 /* Pass an empty dictionary on */
Eli Bendersky48d358b2012-05-30 17:57:50 +03002982 attrib = PyDict_New();
2983 if (!attrib)
2984 return;
2985 }
2986
2987 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002988 /* shortcut */
2989 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
2990 tag, attrib);
Eli Bendersky48d358b2012-05-30 17:57:50 +03002991 }
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002992 else if (self->handle_start) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002993 res = PyObject_CallFunction(self->handle_start, "OO", tag, attrib);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002994 } else
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00002995 res = NULL;
2996
2997 Py_DECREF(tag);
2998 Py_DECREF(attrib);
2999
3000 Py_XDECREF(res);
3001}
3002
3003static void
3004expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3005 int data_len)
3006{
3007 PyObject* data;
3008 PyObject* res;
3009
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003010 if (PyErr_Occurred())
3011 return;
3012
Neal Norwitz0269b912007-08-08 06:56:02 +00003013 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003014 if (!data)
3015 return; /* parser will look for errors */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003016
3017 if (TreeBuilder_CheckExact(self->target))
3018 /* shortcut */
3019 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3020 else if (self->handle_data)
3021 res = PyObject_CallFunction(self->handle_data, "O", data);
3022 else
3023 res = NULL;
3024
3025 Py_DECREF(data);
3026
3027 Py_XDECREF(res);
3028}
3029
3030static void
3031expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3032{
3033 PyObject* tag;
3034 PyObject* res = NULL;
3035
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003036 if (PyErr_Occurred())
3037 return;
3038
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003039 if (TreeBuilder_CheckExact(self->target))
3040 /* shortcut */
3041 /* the standard tree builder doesn't look at the end tag */
3042 res = treebuilder_handle_end(
3043 (TreeBuilderObject*) self->target, Py_None
3044 );
3045 else if (self->handle_end) {
3046 tag = makeuniversal(self, tag_in);
3047 if (tag) {
3048 res = PyObject_CallFunction(self->handle_end, "O", tag);
3049 Py_DECREF(tag);
3050 }
3051 }
3052
3053 Py_XDECREF(res);
3054}
3055
3056static void
3057expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix,
3058 const XML_Char *uri)
3059{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003060 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3061 PyObject *parcel;
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003062
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003063 if (PyErr_Occurred())
3064 return;
3065
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003066 if (!target->events || !target->start_ns_event_obj)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003067 return;
3068
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003069 if (!uri)
3070 uri = "";
3071 if (!prefix)
3072 prefix = "";
3073
3074 parcel = Py_BuildValue("ss", prefix, uri);
3075 if (!parcel)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003076 return;
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003077 treebuilder_append_event(target, target->start_ns_event_obj, parcel);
3078 Py_DECREF(parcel);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003079}
3080
3081static void
3082expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3083{
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003084 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3085
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003086 if (PyErr_Occurred())
3087 return;
3088
Serhiy Storchaka7efaf952015-12-06 23:51:44 +02003089 if (!target->events)
3090 return;
3091
3092 treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003093}
3094
3095static void
3096expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3097{
3098 PyObject* comment;
3099 PyObject* res;
3100
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003101 if (PyErr_Occurred())
3102 return;
3103
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003104 if (self->handle_comment) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003105 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003106 if (comment) {
3107 res = PyObject_CallFunction(self->handle_comment, "O", comment);
3108 Py_XDECREF(res);
3109 Py_DECREF(comment);
3110 }
3111 }
3112}
3113
Eli Bendersky45839902013-01-13 05:14:47 -08003114static void
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003115expat_start_doctype_handler(XMLParserObject *self,
3116 const XML_Char *doctype_name,
3117 const XML_Char *sysid,
3118 const XML_Char *pubid,
3119 int has_internal_subset)
3120{
3121 PyObject *self_pyobj = (PyObject *)self;
3122 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3123 PyObject *parser_doctype = NULL;
3124 PyObject *res = NULL;
3125
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003126 if (PyErr_Occurred())
3127 return;
3128
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003129 doctype_name_obj = makeuniversal(self, doctype_name);
3130 if (!doctype_name_obj)
3131 return;
3132
3133 if (sysid) {
3134 sysid_obj = makeuniversal(self, sysid);
3135 if (!sysid_obj) {
3136 Py_DECREF(doctype_name_obj);
3137 return;
3138 }
3139 } else {
3140 Py_INCREF(Py_None);
3141 sysid_obj = Py_None;
3142 }
3143
3144 if (pubid) {
3145 pubid_obj = makeuniversal(self, pubid);
3146 if (!pubid_obj) {
3147 Py_DECREF(doctype_name_obj);
3148 Py_DECREF(sysid_obj);
3149 return;
3150 }
3151 } else {
3152 Py_INCREF(Py_None);
3153 pubid_obj = Py_None;
3154 }
3155
3156 /* If the target has a handler for doctype, call it. */
3157 if (self->handle_doctype) {
3158 res = PyObject_CallFunction(self->handle_doctype, "OOO",
3159 doctype_name_obj, pubid_obj, sysid_obj);
3160 Py_CLEAR(res);
3161 }
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003162 else {
3163 /* Now see if the parser itself has a doctype method. If yes and it's
3164 * a custom method, call it but warn about deprecation. If it's only
3165 * the vanilla XMLParser method, do nothing.
3166 */
3167 parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
3168 if (parser_doctype &&
3169 !(PyCFunction_Check(parser_doctype) &&
3170 PyCFunction_GET_SELF(parser_doctype) == self_pyobj &&
3171 PyCFunction_GET_FUNCTION(parser_doctype) ==
3172 (PyCFunction) xmlparser_doctype)) {
3173 res = xmlparser_doctype(self, NULL);
3174 if (!res)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003175 goto clear;
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003176 Py_DECREF(res);
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003177 res = PyObject_CallFunction(parser_doctype, "OOO",
3178 doctype_name_obj, pubid_obj, sysid_obj);
3179 Py_CLEAR(res);
3180 }
3181 }
3182
3183clear:
3184 Py_XDECREF(parser_doctype);
3185 Py_DECREF(doctype_name_obj);
3186 Py_DECREF(pubid_obj);
3187 Py_DECREF(sysid_obj);
3188}
3189
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003190static void
3191expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3192 const XML_Char* data_in)
3193{
3194 PyObject* target;
3195 PyObject* data;
3196 PyObject* res;
3197
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003198 if (PyErr_Occurred())
3199 return;
3200
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003201 if (self->handle_pi) {
Neal Norwitz0269b912007-08-08 06:56:02 +00003202 target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3203 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003204 if (target && data) {
3205 res = PyObject_CallFunction(self->handle_pi, "OO", target, data);
3206 Py_XDECREF(res);
3207 Py_DECREF(data);
3208 Py_DECREF(target);
3209 } else {
3210 Py_XDECREF(data);
3211 Py_XDECREF(target);
3212 }
3213 }
3214}
3215
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003216/* -------------------------------------------------------------------- */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003217
Eli Bendersky52467b12012-06-01 07:13:08 +03003218static PyObject *
3219xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003220{
Eli Bendersky52467b12012-06-01 07:13:08 +03003221 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3222 if (self) {
3223 self->parser = NULL;
3224 self->target = self->entity = self->names = NULL;
3225 self->handle_start = self->handle_data = self->handle_end = NULL;
3226 self->handle_comment = self->handle_pi = self->handle_close = NULL;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003227 self->handle_doctype = NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003228 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003229 return (PyObject *)self;
3230}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003231
Eli Bendersky52467b12012-06-01 07:13:08 +03003232static int
3233xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
3234{
3235 XMLParserObject *self_xp = (XMLParserObject *)self;
3236 PyObject *target = NULL, *html = NULL;
3237 char *encoding = NULL;
Eli Benderskyc68e1362012-06-03 06:09:42 +03003238 static char *kwlist[] = {"html", "target", "encoding", 0};
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003239
Eli Bendersky52467b12012-06-01 07:13:08 +03003240 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
3241 &html, &target, &encoding)) {
3242 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003243 }
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003244
Eli Bendersky52467b12012-06-01 07:13:08 +03003245 self_xp->entity = PyDict_New();
3246 if (!self_xp->entity)
3247 return -1;
3248
3249 self_xp->names = PyDict_New();
3250 if (!self_xp->names) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003251 Py_CLEAR(self_xp->entity);
Eli Bendersky52467b12012-06-01 07:13:08 +03003252 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003253 }
3254
Eli Bendersky52467b12012-06-01 07:13:08 +03003255 self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3256 if (!self_xp->parser) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003257 Py_CLEAR(self_xp->entity);
3258 Py_CLEAR(self_xp->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003259 PyErr_NoMemory();
Eli Bendersky52467b12012-06-01 07:13:08 +03003260 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003261 }
3262
Eli Bendersky52467b12012-06-01 07:13:08 +03003263 if (target) {
3264 Py_INCREF(target);
3265 } else {
Eli Bendersky58d548d2012-05-29 15:45:16 +03003266 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003267 if (!target) {
Antoine Pitrouc1948842012-10-01 23:40:37 +02003268 Py_CLEAR(self_xp->entity);
3269 Py_CLEAR(self_xp->names);
Eli Bendersky52467b12012-06-01 07:13:08 +03003270 EXPAT(ParserFree)(self_xp->parser);
3271 return -1;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003272 }
Eli Bendersky52467b12012-06-01 07:13:08 +03003273 }
3274 self_xp->target = target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003275
Eli Bendersky52467b12012-06-01 07:13:08 +03003276 self_xp->handle_start = PyObject_GetAttrString(target, "start");
3277 self_xp->handle_data = PyObject_GetAttrString(target, "data");
3278 self_xp->handle_end = PyObject_GetAttrString(target, "end");
3279 self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
3280 self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
3281 self_xp->handle_close = PyObject_GetAttrString(target, "close");
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003282 self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003283
3284 PyErr_Clear();
Eli Bendersky45839902013-01-13 05:14:47 -08003285
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003286 /* configure parser */
Eli Bendersky52467b12012-06-01 07:13:08 +03003287 EXPAT(SetUserData)(self_xp->parser, self_xp);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003288 EXPAT(SetElementHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003289 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003290 (XML_StartElementHandler) expat_start_handler,
3291 (XML_EndElementHandler) expat_end_handler
3292 );
3293 EXPAT(SetDefaultHandlerExpand)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003294 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003295 (XML_DefaultHandler) expat_default_handler
3296 );
3297 EXPAT(SetCharacterDataHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003298 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003299 (XML_CharacterDataHandler) expat_data_handler
3300 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003301 if (self_xp->handle_comment)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003302 EXPAT(SetCommentHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003303 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003304 (XML_CommentHandler) expat_comment_handler
3305 );
Eli Bendersky52467b12012-06-01 07:13:08 +03003306 if (self_xp->handle_pi)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003307 EXPAT(SetProcessingInstructionHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003308 self_xp->parser,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003309 (XML_ProcessingInstructionHandler) expat_pi_handler
3310 );
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003311 EXPAT(SetStartDoctypeDeclHandler)(
3312 self_xp->parser,
3313 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3314 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003315 EXPAT(SetUnknownEncodingHandler)(
Eli Bendersky52467b12012-06-01 07:13:08 +03003316 self_xp->parser,
Eli Bendersky6dc32b32013-05-25 05:25:48 -07003317 EXPAT(DefaultUnknownEncodingHandler), NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003318 );
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003319
Eli Bendersky52467b12012-06-01 07:13:08 +03003320 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003321}
3322
Eli Bendersky52467b12012-06-01 07:13:08 +03003323static int
3324xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3325{
3326 Py_VISIT(self->handle_close);
3327 Py_VISIT(self->handle_pi);
3328 Py_VISIT(self->handle_comment);
3329 Py_VISIT(self->handle_end);
3330 Py_VISIT(self->handle_data);
3331 Py_VISIT(self->handle_start);
3332
3333 Py_VISIT(self->target);
3334 Py_VISIT(self->entity);
3335 Py_VISIT(self->names);
3336
3337 return 0;
3338}
3339
3340static int
3341xmlparser_gc_clear(XMLParserObject *self)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003342{
3343 EXPAT(ParserFree)(self->parser);
3344
Antoine Pitrouc1948842012-10-01 23:40:37 +02003345 Py_CLEAR(self->handle_close);
3346 Py_CLEAR(self->handle_pi);
3347 Py_CLEAR(self->handle_comment);
3348 Py_CLEAR(self->handle_end);
3349 Py_CLEAR(self->handle_data);
3350 Py_CLEAR(self->handle_start);
3351 Py_CLEAR(self->handle_doctype);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003352
Antoine Pitrouc1948842012-10-01 23:40:37 +02003353 Py_CLEAR(self->target);
3354 Py_CLEAR(self->entity);
3355 Py_CLEAR(self->names);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003356
Eli Bendersky52467b12012-06-01 07:13:08 +03003357 return 0;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003358}
3359
Eli Bendersky52467b12012-06-01 07:13:08 +03003360static void
3361xmlparser_dealloc(XMLParserObject* self)
3362{
3363 PyObject_GC_UnTrack(self);
3364 xmlparser_gc_clear(self);
3365 Py_TYPE(self)->tp_free((PyObject *)self);
3366}
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003367
3368LOCAL(PyObject*)
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003369expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003370{
3371 int ok;
3372
Victor Stinner3fd8cbd2013-07-18 22:46:14 +02003373 assert(!PyErr_Occurred());
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003374 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3375
3376 if (PyErr_Occurred())
3377 return NULL;
3378
3379 if (!ok) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003380 expat_set_error(
Eli Bendersky5b77d812012-03-16 08:20:05 +02003381 EXPAT(GetErrorCode)(self->parser),
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003382 EXPAT(GetErrorLineNumber)(self->parser),
Eli Bendersky5b77d812012-03-16 08:20:05 +02003383 EXPAT(GetErrorColumnNumber)(self->parser),
3384 NULL
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003385 );
3386 return NULL;
3387 }
3388
3389 Py_RETURN_NONE;
3390}
3391
3392static PyObject*
3393xmlparser_close(XMLParserObject* self, PyObject* args)
3394{
3395 /* end feeding data to parser */
3396
3397 PyObject* res;
3398 if (!PyArg_ParseTuple(args, ":close"))
3399 return NULL;
3400
3401 res = expat_parse(self, "", 0, 1);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003402 if (!res)
3403 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003404
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003405 if (TreeBuilder_CheckExact(self->target)) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003406 Py_DECREF(res);
3407 return treebuilder_done((TreeBuilderObject*) self->target);
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003408 }
3409 else if (self->handle_close) {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003410 Py_DECREF(res);
3411 return PyObject_CallFunction(self->handle_close, "");
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003412 }
3413 else {
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003414 return res;
Eli Bendersky6eb50b12013-08-24 15:17:08 -07003415 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003416}
3417
3418static PyObject*
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003419xmlparser_feed(XMLParserObject* self, PyObject* arg)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003420{
3421 /* feed data to parser */
3422
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003423 if (PyUnicode_Check(arg)) {
3424 Py_ssize_t data_len;
3425 const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
3426 if (data == NULL)
3427 return NULL;
3428 if (data_len > INT_MAX) {
3429 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3430 return NULL;
3431 }
3432 /* Explicitly set UTF-8 encoding. Return code ignored. */
3433 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3434 return expat_parse(self, data, (int)data_len, 0);
3435 }
3436 else {
3437 Py_buffer view;
3438 PyObject *res;
3439 if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
3440 return NULL;
3441 if (view.len > INT_MAX) {
3442 PyBuffer_Release(&view);
3443 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3444 return NULL;
3445 }
3446 res = expat_parse(self, view.buf, (int)view.len, 0);
3447 PyBuffer_Release(&view);
3448 return res;
3449 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003450}
3451
3452static PyObject*
Eli Benderskya3699232013-05-19 18:47:23 -07003453xmlparser_parse_whole(XMLParserObject* self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003454{
Eli Benderskya3699232013-05-19 18:47:23 -07003455 /* (internal) parse the whole input, until end of stream */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003456 PyObject* reader;
3457 PyObject* buffer;
Eli Benderskyf996e772012-03-16 05:53:30 +02003458 PyObject* temp;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003459 PyObject* res;
3460
3461 PyObject* fileobj;
3462 if (!PyArg_ParseTuple(args, "O:_parse", &fileobj))
3463 return NULL;
3464
3465 reader = PyObject_GetAttrString(fileobj, "read");
3466 if (!reader)
3467 return NULL;
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003468
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003469 /* read from open file object */
3470 for (;;) {
3471
3472 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3473
3474 if (!buffer) {
3475 /* read failed (e.g. due to KeyboardInterrupt) */
3476 Py_DECREF(reader);
3477 return NULL;
3478 }
3479
Eli Benderskyf996e772012-03-16 05:53:30 +02003480 if (PyUnicode_CheckExact(buffer)) {
3481 /* A unicode object is encoded into bytes using UTF-8 */
Victor Stinner59799a82013-11-13 14:17:30 +01003482 if (PyUnicode_GET_LENGTH(buffer) == 0) {
Eli Benderskyf996e772012-03-16 05:53:30 +02003483 Py_DECREF(buffer);
3484 break;
3485 }
3486 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
Antoine Pitrouc1948842012-10-01 23:40:37 +02003487 Py_DECREF(buffer);
Eli Benderskyf996e772012-03-16 05:53:30 +02003488 if (!temp) {
3489 /* Propagate exception from PyUnicode_AsEncodedString */
Eli Benderskyf996e772012-03-16 05:53:30 +02003490 Py_DECREF(reader);
3491 return NULL;
3492 }
Eli Benderskyf996e772012-03-16 05:53:30 +02003493 buffer = temp;
3494 }
3495 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003496 Py_DECREF(buffer);
3497 break;
3498 }
3499
Serhiy Storchaka097a6642015-11-25 20:12:37 +02003500 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3501 Py_DECREF(buffer);
3502 Py_DECREF(reader);
3503 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3504 return NULL;
3505 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003506 res = expat_parse(
Serhiy Storchaka097a6642015-11-25 20:12:37 +02003507 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003508 );
3509
3510 Py_DECREF(buffer);
3511
3512 if (!res) {
3513 Py_DECREF(reader);
3514 return NULL;
3515 }
3516 Py_DECREF(res);
3517
3518 }
3519
3520 Py_DECREF(reader);
3521
3522 res = expat_parse(self, "", 0, 1);
3523
3524 if (res && TreeBuilder_CheckExact(self->target)) {
3525 Py_DECREF(res);
3526 return treebuilder_done((TreeBuilderObject*) self->target);
3527 }
3528
3529 return res;
3530}
3531
3532static PyObject*
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003533xmlparser_doctype(XMLParserObject *self, PyObject *args)
3534{
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003535 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3536 "This method of XMLParser is deprecated. Define"
3537 " doctype() method on the TreeBuilder target.",
3538 1) < 0) {
3539 return NULL;
3540 }
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003541 Py_RETURN_NONE;
3542}
3543
3544static PyObject*
3545xmlparser_setevents(XMLParserObject *self, PyObject* args)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003546{
3547 /* activate element event reporting */
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003548 Py_ssize_t i, seqlen;
3549 TreeBuilderObject *target;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003550
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003551 PyObject *events_queue;
3552 PyObject *events_to_report = Py_None;
3553 PyObject *events_seq;
3554 if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
3555 &events_to_report))
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003556 return NULL;
3557
3558 if (!TreeBuilder_CheckExact(self->target)) {
3559 PyErr_SetString(
3560 PyExc_TypeError,
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003561 "event handling only supported for ElementTree.TreeBuilder "
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003562 "targets"
3563 );
3564 return NULL;
3565 }
3566
3567 target = (TreeBuilderObject*) self->target;
3568
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003569 Py_INCREF(events_queue);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003570 Py_XDECREF(target->events);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003571 target->events = events_queue;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003572
3573 /* clear out existing events */
Antoine Pitrouc1948842012-10-01 23:40:37 +02003574 Py_CLEAR(target->start_event_obj);
3575 Py_CLEAR(target->end_event_obj);
3576 Py_CLEAR(target->start_ns_event_obj);
3577 Py_CLEAR(target->end_ns_event_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003578
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003579 if (events_to_report == Py_None) {
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003580 /* default is "end" only */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003581 target->end_event_obj = PyUnicode_FromString("end");
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003582 Py_RETURN_NONE;
3583 }
3584
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003585 if (!(events_seq = PySequence_Fast(events_to_report,
3586 "events must be a sequence"))) {
3587 return NULL;
3588 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003589
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003590 seqlen = PySequence_Size(events_seq);
3591 for (i = 0; i < seqlen; ++i) {
3592 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
3593 char *event_name = NULL;
3594 if (PyUnicode_Check(event_name_obj)) {
3595 event_name = _PyUnicode_AsString(event_name_obj);
3596 } else if (PyBytes_Check(event_name_obj)) {
3597 event_name = PyBytes_AS_STRING(event_name_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003598 }
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003599
3600 if (event_name == NULL) {
3601 Py_DECREF(events_seq);
3602 PyErr_Format(PyExc_ValueError, "invalid events sequence");
3603 return NULL;
3604 } else if (strcmp(event_name, "start") == 0) {
3605 Py_INCREF(event_name_obj);
3606 target->start_event_obj = event_name_obj;
3607 } else if (strcmp(event_name, "end") == 0) {
3608 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003609 Py_XDECREF(target->end_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003610 target->end_event_obj = event_name_obj;
3611 } else if (strcmp(event_name, "start-ns") == 0) {
3612 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003613 Py_XDECREF(target->start_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003614 target->start_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003615 EXPAT(SetNamespaceDeclHandler)(
3616 self->parser,
3617 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3618 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3619 );
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003620 } else if (strcmp(event_name, "end-ns") == 0) {
3621 Py_INCREF(event_name_obj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003622 Py_XDECREF(target->end_ns_event_obj);
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003623 target->end_ns_event_obj = event_name_obj;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003624 EXPAT(SetNamespaceDeclHandler)(
3625 self->parser,
3626 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3627 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3628 );
3629 } else {
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003630 Py_DECREF(events_seq);
3631 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003632 return NULL;
3633 }
3634 }
3635
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07003636 Py_DECREF(events_seq);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003637 Py_RETURN_NONE;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003638}
3639
3640static PyMethodDef xmlparser_methods[] = {
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003641 {"feed", (PyCFunction) xmlparser_feed, METH_O},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003642 {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
Eli Benderskya3699232013-05-19 18:47:23 -07003643 {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003644 {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003645 {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003646 {NULL, NULL}
3647};
3648
Victor Stinnerbfc7bf02011-03-21 13:23:42 +01003649static PyObject*
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003650xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003651{
Alexander Belopolskye239d232010-12-08 23:31:48 +00003652 if (PyUnicode_Check(nameobj)) {
3653 PyObject* res;
3654 if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3655 res = self->entity;
3656 else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3657 res = self->target;
3658 else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3659 return PyUnicode_FromFormat(
3660 "Expat %d.%d.%d", XML_MAJOR_VERSION,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003661 XML_MINOR_VERSION, XML_MICRO_VERSION);
Alexander Belopolskye239d232010-12-08 23:31:48 +00003662 }
3663 else
3664 goto generic;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003665
Alexander Belopolskye239d232010-12-08 23:31:48 +00003666 Py_INCREF(res);
3667 return res;
3668 }
3669 generic:
3670 return PyObject_GenericGetAttr((PyObject*) self, nameobj);
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003671}
3672
Neal Norwitz227b5332006-03-22 09:28:35 +00003673static PyTypeObject XMLParser_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003674 PyVarObject_HEAD_INIT(NULL, 0)
Eli Bendersky698bdb22013-01-10 06:01:06 -08003675 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003676 /* methods */
Eli Bendersky52467b12012-06-01 07:13:08 +03003677 (destructor)xmlparser_dealloc, /* tp_dealloc */
3678 0, /* tp_print */
3679 0, /* tp_getattr */
3680 0, /* tp_setattr */
3681 0, /* tp_reserved */
3682 0, /* tp_repr */
3683 0, /* tp_as_number */
3684 0, /* tp_as_sequence */
3685 0, /* tp_as_mapping */
3686 0, /* tp_hash */
3687 0, /* tp_call */
3688 0, /* tp_str */
3689 (getattrofunc)xmlparser_getattro, /* tp_getattro */
3690 0, /* tp_setattro */
3691 0, /* tp_as_buffer */
3692 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3693 /* tp_flags */
3694 0, /* tp_doc */
3695 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
3696 (inquiry)xmlparser_gc_clear, /* tp_clear */
3697 0, /* tp_richcompare */
3698 0, /* tp_weaklistoffset */
3699 0, /* tp_iter */
3700 0, /* tp_iternext */
3701 xmlparser_methods, /* tp_methods */
3702 0, /* tp_members */
3703 0, /* tp_getset */
3704 0, /* tp_base */
3705 0, /* tp_dict */
3706 0, /* tp_descr_get */
3707 0, /* tp_descr_set */
3708 0, /* tp_dictoffset */
3709 (initproc)xmlparser_init, /* tp_init */
3710 PyType_GenericAlloc, /* tp_alloc */
3711 xmlparser_new, /* tp_new */
3712 0, /* tp_free */
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003713};
3714
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003715/* ==================================================================== */
3716/* python module interface */
3717
3718static PyMethodDef _functions[] = {
Eli Benderskya8736902013-01-05 06:26:39 -08003719 {"SubElement", (PyCFunction) subelement, METH_VARARGS | METH_KEYWORDS},
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003720 {NULL, NULL}
3721};
3722
Martin v. Löwis1a214512008-06-11 05:26:20 +00003723
Eli Bendersky532d03e2013-08-10 08:00:39 -07003724static struct PyModuleDef elementtreemodule = {
3725 PyModuleDef_HEAD_INIT,
3726 "_elementtree",
3727 NULL,
3728 sizeof(elementtreestate),
3729 _functions,
3730 NULL,
3731 elementtree_traverse,
3732 elementtree_clear,
3733 elementtree_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00003734};
3735
Neal Norwitzf6657e62006-12-28 04:47:50 +00003736PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00003737PyInit__elementtree(void)
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003738{
Eli Bendersky64d11e62012-06-15 07:42:50 +03003739 PyObject *m, *temp;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003740 elementtreestate *st;
3741
3742 m = PyState_FindModule(&elementtreemodule);
3743 if (m) {
3744 Py_INCREF(m);
3745 return m;
3746 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003747
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003748 /* Initialize object types */
Ronald Oussoren138d0802013-07-19 11:11:25 +02003749 if (PyType_Ready(&ElementIter_Type) < 0)
3750 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003751 if (PyType_Ready(&TreeBuilder_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003752 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003753 if (PyType_Ready(&Element_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003754 return NULL;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00003755 if (PyType_Ready(&XMLParser_Type) < 0)
Alexander Belopolskyf0f45142010-08-11 17:31:17 +00003756 return NULL;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003757
Eli Bendersky532d03e2013-08-10 08:00:39 -07003758 m = PyModule_Create(&elementtreemodule);
Fredrik Lundh44ed4db2006-03-12 21:06:35 +00003759 if (!m)
Martin v. Löwis1a214512008-06-11 05:26:20 +00003760 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003761 st = ET_STATE(m);
Martin v. Löwis1a214512008-06-11 05:26:20 +00003762
Eli Bendersky828efde2012-04-05 05:40:58 +03003763 if (!(temp = PyImport_ImportModule("copy")))
3764 return NULL;
Eli Bendersky532d03e2013-08-10 08:00:39 -07003765 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
Eli Bendersky828efde2012-04-05 05:40:58 +03003766 Py_XDECREF(temp);
3767
Eli Bendersky532d03e2013-08-10 08:00:39 -07003768 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
Eli Bendersky828efde2012-04-05 05:40:58 +03003769 return NULL;
3770
Eli Bendersky20d41742012-06-01 09:48:37 +03003771 /* link against pyexpat */
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003772 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
3773 if (expat_capi) {
3774 /* check that it's usable */
3775 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
3776 expat_capi->size < sizeof(struct PyExpat_CAPI) ||
3777 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
3778 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
Eli Bendersky52467b12012-06-01 07:13:08 +03003779 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
Eli Benderskyef391ac2012-07-21 20:28:46 +03003780 PyErr_SetString(PyExc_ImportError,
3781 "pyexpat version is incompatible");
3782 return NULL;
Eli Bendersky52467b12012-06-01 07:13:08 +03003783 }
Eli Benderskyef391ac2012-07-21 20:28:46 +03003784 } else {
Eli Bendersky52467b12012-06-01 07:13:08 +03003785 return NULL;
Eli Benderskyef391ac2012-07-21 20:28:46 +03003786 }
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003787
Eli Bendersky532d03e2013-08-10 08:00:39 -07003788 st->parseerror_obj = PyErr_NewException(
Florent Xiclunaa72a98f2012-02-13 11:03:30 +01003789 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003790 );
Eli Bendersky532d03e2013-08-10 08:00:39 -07003791 Py_INCREF(st->parseerror_obj);
3792 PyModule_AddObject(m, "ParseError", st->parseerror_obj);
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003793
Eli Bendersky092af1f2012-03-04 07:14:03 +02003794 Py_INCREF((PyObject *)&Element_Type);
3795 PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
3796
Eli Bendersky58d548d2012-05-29 15:45:16 +03003797 Py_INCREF((PyObject *)&TreeBuilder_Type);
3798 PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
3799
Eli Bendersky52467b12012-06-01 07:13:08 +03003800 Py_INCREF((PyObject *)&XMLParser_Type);
3801 PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
Eli Bendersky52467b12012-06-01 07:13:08 +03003802
Florent Xiclunaf15351d2010-03-13 23:24:31 +00003803 return m;
Fredrik Lundh8c8836b2005-12-16 22:06:06 +00003804}